Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Kitware
GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmexpat/lib/xmlparse.c
3153 views
1
/* 2a14271ad4d35e82bde8ba210b4edb7998794bcbae54deab114046a300f9639a (2.6.2+)
2
__ __ _
3
___\ \/ /_ __ __ _| |_
4
/ _ \\ /| '_ \ / _` | __|
5
| __// \| |_) | (_| | |_
6
\___/_/\_\ .__/ \__,_|\__|
7
|_| XML parser
8
9
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10
Copyright (c) 2000 Clark Cooper <[email protected]>
11
Copyright (c) 2000-2006 Fred L. Drake, Jr. <[email protected]>
12
Copyright (c) 2001-2002 Greg Stein <[email protected]>
13
Copyright (c) 2002-2016 Karl Waclawek <[email protected]>
14
Copyright (c) 2005-2009 Steven Solie <[email protected]>
15
Copyright (c) 2016 Eric Rahm <[email protected]>
16
Copyright (c) 2016-2024 Sebastian Pipping <[email protected]>
17
Copyright (c) 2016 Gaurav <[email protected]>
18
Copyright (c) 2016 Thomas Beutlich <[email protected]>
19
Copyright (c) 2016 Gustavo Grieco <[email protected]>
20
Copyright (c) 2016 Pascal Cuoq <[email protected]>
21
Copyright (c) 2016 Ed Schouten <[email protected]>
22
Copyright (c) 2017-2022 Rhodri James <[email protected]>
23
Copyright (c) 2017 Václav Slavík <[email protected]>
24
Copyright (c) 2017 Viktor Szakats <[email protected]>
25
Copyright (c) 2017 Chanho Park <[email protected]>
26
Copyright (c) 2017 Rolf Eike Beer <[email protected]>
27
Copyright (c) 2017 Hans Wennborg <[email protected]>
28
Copyright (c) 2018 Anton Maklakov <[email protected]>
29
Copyright (c) 2018 Benjamin Peterson <[email protected]>
30
Copyright (c) 2018 Marco Maggi <[email protected]>
31
Copyright (c) 2018 Mariusz Zaborski <[email protected]>
32
Copyright (c) 2019 David Loffredo <[email protected]>
33
Copyright (c) 2019-2020 Ben Wagner <[email protected]>
34
Copyright (c) 2019 Vadim Zeitlin <[email protected]>
35
Copyright (c) 2021 Donghee Na <[email protected]>
36
Copyright (c) 2022 Samanta Navarro <[email protected]>
37
Copyright (c) 2022 Jeffrey Walton <[email protected]>
38
Copyright (c) 2022 Jann Horn <[email protected]>
39
Copyright (c) 2022 Sean McBride <[email protected]>
40
Copyright (c) 2023 Owain Davies <[email protected]>
41
Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <[email protected]>
42
Licensed under the MIT license:
43
44
Permission is hereby granted, free of charge, to any person obtaining
45
a copy of this software and associated documentation files (the
46
"Software"), to deal in the Software without restriction, including
47
without limitation the rights to use, copy, modify, merge, publish,
48
distribute, sublicense, and/or sell copies of the Software, and to permit
49
persons to whom the Software is furnished to do so, subject to the
50
following conditions:
51
52
The above copyright notice and this permission notice shall be included
53
in all copies or substantial portions of the Software.
54
55
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
56
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
57
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
58
NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
59
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
60
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
61
USE OR OTHER DEALINGS IN THE SOFTWARE.
62
*/
63
64
#define XML_BUILDING_EXPAT 1
65
66
#include "expat_config.h"
67
68
#if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
69
# error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
70
#endif
71
72
#if defined(XML_DTD) && XML_GE == 0
73
# error Either undefine XML_DTD or define XML_GE to 1.
74
#endif
75
76
#if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \
77
|| (XML_CONTEXT_BYTES + 0 < 0)
78
# error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
79
#endif
80
81
#if defined(HAVE_SYSCALL_GETRANDOM)
82
# if ! defined(_GNU_SOURCE)
83
# define _GNU_SOURCE 1 /* syscall prototype */
84
# endif
85
#endif
86
87
#ifdef _WIN32
88
/* force stdlib to define rand_s() */
89
# if ! defined(_CRT_RAND_S)
90
# define _CRT_RAND_S
91
# endif
92
#endif
93
94
#include <stdbool.h>
95
#include <stddef.h>
96
#include <string.h> /* memset(), memcpy() */
97
#include <assert.h>
98
#include <limits.h> /* UINT_MAX */
99
#include <stdio.h> /* fprintf */
100
#include <stdlib.h> /* getenv, rand_s */
101
#include <stdint.h> /* uintptr_t */
102
#include <math.h> /* isnan */
103
104
#ifdef _WIN32
105
# define getpid GetCurrentProcessId
106
#else
107
# include <sys/time.h> /* gettimeofday() */
108
# include <sys/types.h> /* getpid() */
109
# include <unistd.h> /* getpid() */
110
# include <fcntl.h> /* O_RDONLY */
111
# include <errno.h>
112
#endif
113
114
#ifdef _WIN32
115
# include "winconfig.h"
116
#endif
117
118
#include "ascii.h"
119
#include "expat.h"
120
#include "siphash.h"
121
122
#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
123
# if defined(HAVE_GETRANDOM)
124
# include <sys/random.h> /* getrandom */
125
# else
126
# include <unistd.h> /* syscall */
127
# include <sys/syscall.h> /* SYS_getrandom */
128
# endif
129
# if ! defined(GRND_NONBLOCK)
130
# define GRND_NONBLOCK 0x0001
131
# endif /* defined(GRND_NONBLOCK) */
132
#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
133
134
#if defined(HAVE_LIBBSD) \
135
&& (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
136
# include <bsd/stdlib.h>
137
#endif
138
139
#if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
140
# define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
141
#endif
142
143
#if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
144
&& ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
145
&& ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
146
&& ! defined(XML_POOR_ENTROPY)
147
# error You do not have support for any sources of high quality entropy \
148
enabled. For end user security, that is probably not what you want. \
149
\
150
Your options include: \
151
* Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
152
* Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
153
* BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
154
* BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
155
* libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
156
* libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
157
* Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
158
* Windows >=Vista (rand_s): _WIN32. \
159
\
160
If insist on not using any of these, bypass this error by defining \
161
XML_POOR_ENTROPY; you have been warned. \
162
\
163
If you have reasons to patch this detection code away or need changes \
164
to the build system, please open a bug. Thank you!
165
#endif
166
167
#ifdef XML_UNICODE
168
# define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
169
# define XmlConvert XmlUtf16Convert
170
# define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
171
# define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
172
# define XmlEncode XmlUtf16Encode
173
# define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
174
typedef unsigned short ICHAR;
175
#else
176
# define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
177
# define XmlConvert XmlUtf8Convert
178
# define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
179
# define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
180
# define XmlEncode XmlUtf8Encode
181
# define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
182
typedef char ICHAR;
183
#endif
184
185
#ifndef XML_NS
186
187
# define XmlInitEncodingNS XmlInitEncoding
188
# define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
189
# undef XmlGetInternalEncodingNS
190
# define XmlGetInternalEncodingNS XmlGetInternalEncoding
191
# define XmlParseXmlDeclNS XmlParseXmlDecl
192
193
#endif
194
195
#ifdef XML_UNICODE
196
197
# ifdef XML_UNICODE_WCHAR_T
198
# define XML_T(x) (const wchar_t) x
199
# define XML_L(x) L##x
200
# else
201
# define XML_T(x) (const unsigned short)x
202
# define XML_L(x) x
203
# endif
204
205
#else
206
207
# define XML_T(x) x
208
# define XML_L(x) x
209
210
#endif
211
212
/* Round up n to be a multiple of sz, where sz is a power of 2. */
213
#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
214
215
/* Do safe (NULL-aware) pointer arithmetic */
216
#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
217
218
#define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
219
220
#include "internal.h"
221
#include "xmltok.h"
222
#include "xmlrole.h"
223
224
typedef const XML_Char *KEY;
225
226
typedef struct {
227
KEY name;
228
} NAMED;
229
230
typedef struct {
231
NAMED **v;
232
unsigned char power;
233
size_t size;
234
size_t used;
235
const XML_Memory_Handling_Suite *mem;
236
} HASH_TABLE;
237
238
static size_t keylen(KEY s);
239
240
static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
241
242
/* For probing (after a collision) we need a step size relative prime
243
to the hash table size, which is a power of 2. We use double-hashing,
244
since we can calculate a second hash value cheaply by taking those bits
245
of the first hash value that were discarded (masked out) when the table
246
index was calculated: index = hash & mask, where mask = table->size - 1.
247
We limit the maximum step size to table->size / 4 (mask >> 2) and make
248
it odd, since odd numbers are always relative prime to a power of 2.
249
*/
250
#define SECOND_HASH(hash, mask, power) \
251
((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
252
#define PROBE_STEP(hash, mask, power) \
253
((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
254
255
typedef struct {
256
NAMED **p;
257
NAMED **end;
258
} HASH_TABLE_ITER;
259
260
#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
261
#define INIT_DATA_BUF_SIZE 1024
262
#define INIT_ATTS_SIZE 16
263
#define INIT_ATTS_VERSION 0xFFFFFFFF
264
#define INIT_BLOCK_SIZE 1024
265
#define INIT_BUFFER_SIZE 1024
266
267
#define EXPAND_SPARE 24
268
269
typedef struct binding {
270
struct prefix *prefix;
271
struct binding *nextTagBinding;
272
struct binding *prevPrefixBinding;
273
const struct attribute_id *attId;
274
XML_Char *uri;
275
int uriLen;
276
int uriAlloc;
277
} BINDING;
278
279
typedef struct prefix {
280
const XML_Char *name;
281
BINDING *binding;
282
} PREFIX;
283
284
typedef struct {
285
const XML_Char *str;
286
const XML_Char *localPart;
287
const XML_Char *prefix;
288
int strLen;
289
int uriLen;
290
int prefixLen;
291
} TAG_NAME;
292
293
/* TAG represents an open element.
294
The name of the element is stored in both the document and API
295
encodings. The memory buffer 'buf' is a separately-allocated
296
memory area which stores the name. During the XML_Parse()/
297
XMLParseBuffer() when the element is open, the memory for the 'raw'
298
version of the name (in the document encoding) is shared with the
299
document buffer. If the element is open across calls to
300
XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
301
contain the 'raw' name as well.
302
303
A parser reuses these structures, maintaining a list of allocated
304
TAG objects in a free list.
305
*/
306
typedef struct tag {
307
struct tag *parent; /* parent of this element */
308
const char *rawName; /* tagName in the original encoding */
309
int rawNameLength;
310
TAG_NAME name; /* tagName in the API encoding */
311
char *buf; /* buffer for name components */
312
char *bufEnd; /* end of the buffer */
313
BINDING *bindings;
314
} TAG;
315
316
typedef struct {
317
const XML_Char *name;
318
const XML_Char *textPtr;
319
int textLen; /* length in XML_Chars */
320
int processed; /* # of processed bytes - when suspended */
321
const XML_Char *systemId;
322
const XML_Char *base;
323
const XML_Char *publicId;
324
const XML_Char *notation;
325
XML_Bool open;
326
XML_Bool is_param;
327
XML_Bool is_internal; /* true if declared in internal subset outside PE */
328
} ENTITY;
329
330
typedef struct {
331
enum XML_Content_Type type;
332
enum XML_Content_Quant quant;
333
const XML_Char *name;
334
int firstchild;
335
int lastchild;
336
int childcnt;
337
int nextsib;
338
} CONTENT_SCAFFOLD;
339
340
#define INIT_SCAFFOLD_ELEMENTS 32
341
342
typedef struct block {
343
struct block *next;
344
int size;
345
XML_Char s[1];
346
} BLOCK;
347
348
typedef struct {
349
BLOCK *blocks;
350
BLOCK *freeBlocks;
351
const XML_Char *end;
352
XML_Char *ptr;
353
XML_Char *start;
354
const XML_Memory_Handling_Suite *mem;
355
} STRING_POOL;
356
357
/* The XML_Char before the name is used to determine whether
358
an attribute has been specified. */
359
typedef struct attribute_id {
360
XML_Char *name;
361
PREFIX *prefix;
362
XML_Bool maybeTokenized;
363
XML_Bool xmlns;
364
} ATTRIBUTE_ID;
365
366
typedef struct {
367
const ATTRIBUTE_ID *id;
368
XML_Bool isCdata;
369
const XML_Char *value;
370
} DEFAULT_ATTRIBUTE;
371
372
typedef struct {
373
unsigned long version;
374
unsigned long hash;
375
const XML_Char *uriName;
376
} NS_ATT;
377
378
typedef struct {
379
const XML_Char *name;
380
PREFIX *prefix;
381
const ATTRIBUTE_ID *idAtt;
382
int nDefaultAtts;
383
int allocDefaultAtts;
384
DEFAULT_ATTRIBUTE *defaultAtts;
385
} ELEMENT_TYPE;
386
387
typedef struct {
388
HASH_TABLE generalEntities;
389
HASH_TABLE elementTypes;
390
HASH_TABLE attributeIds;
391
HASH_TABLE prefixes;
392
STRING_POOL pool;
393
STRING_POOL entityValuePool;
394
/* false once a parameter entity reference has been skipped */
395
XML_Bool keepProcessing;
396
/* true once an internal or external PE reference has been encountered;
397
this includes the reference to an external subset */
398
XML_Bool hasParamEntityRefs;
399
XML_Bool standalone;
400
#ifdef XML_DTD
401
/* indicates if external PE has been read */
402
XML_Bool paramEntityRead;
403
HASH_TABLE paramEntities;
404
#endif /* XML_DTD */
405
PREFIX defaultPrefix;
406
/* === scaffolding for building content model === */
407
XML_Bool in_eldecl;
408
CONTENT_SCAFFOLD *scaffold;
409
unsigned contentStringLen;
410
unsigned scaffSize;
411
unsigned scaffCount;
412
int scaffLevel;
413
int *scaffIndex;
414
} DTD;
415
416
typedef struct open_internal_entity {
417
const char *internalEventPtr;
418
const char *internalEventEndPtr;
419
struct open_internal_entity *next;
420
ENTITY *entity;
421
int startTagLevel;
422
XML_Bool betweenDecl; /* WFC: PE Between Declarations */
423
} OPEN_INTERNAL_ENTITY;
424
425
enum XML_Account {
426
XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
427
XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
428
expansion */
429
XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
430
};
431
432
#if XML_GE == 1
433
typedef unsigned long long XmlBigCount;
434
typedef struct accounting {
435
XmlBigCount countBytesDirect;
436
XmlBigCount countBytesIndirect;
437
unsigned long debugLevel;
438
float maximumAmplificationFactor; // >=1.0
439
unsigned long long activationThresholdBytes;
440
} ACCOUNTING;
441
442
typedef struct entity_stats {
443
unsigned int countEverOpened;
444
unsigned int currentDepth;
445
unsigned int maximumDepthSeen;
446
unsigned long debugLevel;
447
} ENTITY_STATS;
448
#endif /* XML_GE == 1 */
449
450
typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
451
const char *end, const char **endPtr);
452
453
static Processor prologProcessor;
454
static Processor prologInitProcessor;
455
static Processor contentProcessor;
456
static Processor cdataSectionProcessor;
457
#ifdef XML_DTD
458
static Processor ignoreSectionProcessor;
459
static Processor externalParEntProcessor;
460
static Processor externalParEntInitProcessor;
461
static Processor entityValueProcessor;
462
static Processor entityValueInitProcessor;
463
#endif /* XML_DTD */
464
static Processor epilogProcessor;
465
static Processor errorProcessor;
466
static Processor externalEntityInitProcessor;
467
static Processor externalEntityInitProcessor2;
468
static Processor externalEntityInitProcessor3;
469
static Processor externalEntityContentProcessor;
470
static Processor internalEntityProcessor;
471
472
static enum XML_Error handleUnknownEncoding(XML_Parser parser,
473
const XML_Char *encodingName);
474
static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
475
const char *s, const char *next);
476
static enum XML_Error initializeEncoding(XML_Parser parser);
477
static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
478
const char *s, const char *end, int tok,
479
const char *next, const char **nextPtr,
480
XML_Bool haveMore, XML_Bool allowClosingDoctype,
481
enum XML_Account account);
482
static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
483
XML_Bool betweenDecl);
484
static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
485
const ENCODING *enc, const char *start,
486
const char *end, const char **endPtr,
487
XML_Bool haveMore, enum XML_Account account);
488
static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
489
const char **startPtr, const char *end,
490
const char **nextPtr, XML_Bool haveMore,
491
enum XML_Account account);
492
#ifdef XML_DTD
493
static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
494
const char **startPtr, const char *end,
495
const char **nextPtr, XML_Bool haveMore);
496
#endif /* XML_DTD */
497
498
static void freeBindings(XML_Parser parser, BINDING *bindings);
499
static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
500
const char *attStr, TAG_NAME *tagNamePtr,
501
BINDING **bindingsPtr,
502
enum XML_Account account);
503
static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
504
const ATTRIBUTE_ID *attId, const XML_Char *uri,
505
BINDING **bindingsPtr);
506
static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
507
XML_Bool isCdata, XML_Bool isId,
508
const XML_Char *value, XML_Parser parser);
509
static enum XML_Error storeAttributeValue(XML_Parser parser,
510
const ENCODING *enc, XML_Bool isCdata,
511
const char *ptr, const char *end,
512
STRING_POOL *pool,
513
enum XML_Account account);
514
static enum XML_Error appendAttributeValue(XML_Parser parser,
515
const ENCODING *enc,
516
XML_Bool isCdata, const char *ptr,
517
const char *end, STRING_POOL *pool,
518
enum XML_Account account);
519
static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
520
const char *start, const char *end);
521
static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
522
#if XML_GE == 1
523
static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
524
const char *start, const char *end,
525
enum XML_Account account);
526
#else
527
static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
528
#endif
529
static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
530
const char *start, const char *end);
531
static int reportComment(XML_Parser parser, const ENCODING *enc,
532
const char *start, const char *end);
533
static void reportDefault(XML_Parser parser, const ENCODING *enc,
534
const char *start, const char *end);
535
536
static const XML_Char *getContext(XML_Parser parser);
537
static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
538
539
static void FASTCALL normalizePublicId(XML_Char *s);
540
541
static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
542
/* do not call if m_parentParser != NULL */
543
static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
544
static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
545
const XML_Memory_Handling_Suite *ms);
546
static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
547
const XML_Memory_Handling_Suite *ms);
548
static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
549
STRING_POOL *newPool, const HASH_TABLE *oldTable);
550
static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
551
size_t createSize);
552
static void FASTCALL hashTableInit(HASH_TABLE *table,
553
const XML_Memory_Handling_Suite *ms);
554
static void FASTCALL hashTableClear(HASH_TABLE *table);
555
static void FASTCALL hashTableDestroy(HASH_TABLE *table);
556
static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
557
const HASH_TABLE *table);
558
static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
559
560
static void FASTCALL poolInit(STRING_POOL *pool,
561
const XML_Memory_Handling_Suite *ms);
562
static void FASTCALL poolClear(STRING_POOL *pool);
563
static void FASTCALL poolDestroy(STRING_POOL *pool);
564
static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
565
const char *ptr, const char *end);
566
static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
567
const char *ptr, const char *end);
568
static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
569
static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
570
const XML_Char *s);
571
static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
572
int n);
573
static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
574
const XML_Char *s);
575
576
static int FASTCALL nextScaffoldPart(XML_Parser parser);
577
static XML_Content *build_model(XML_Parser parser);
578
static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
579
const char *ptr, const char *end);
580
581
static XML_Char *copyString(const XML_Char *s,
582
const XML_Memory_Handling_Suite *memsuite);
583
584
static unsigned long generate_hash_secret_salt(XML_Parser parser);
585
static XML_Bool startParsing(XML_Parser parser);
586
587
static XML_Parser parserCreate(const XML_Char *encodingName,
588
const XML_Memory_Handling_Suite *memsuite,
589
const XML_Char *nameSep, DTD *dtd);
590
591
static void parserInit(XML_Parser parser, const XML_Char *encodingName);
592
593
#if XML_GE == 1
594
static float accountingGetCurrentAmplification(XML_Parser rootParser);
595
static void accountingReportStats(XML_Parser originParser, const char *epilog);
596
static void accountingOnAbort(XML_Parser originParser);
597
static void accountingReportDiff(XML_Parser rootParser,
598
unsigned int levelsAwayFromRootParser,
599
const char *before, const char *after,
600
ptrdiff_t bytesMore, int source_line,
601
enum XML_Account account);
602
static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
603
const char *before, const char *after,
604
int source_line,
605
enum XML_Account account);
606
607
static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
608
const char *action, int sourceLine);
609
static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
610
int sourceLine);
611
static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
612
int sourceLine);
613
614
static XML_Parser getRootParserOf(XML_Parser parser,
615
unsigned int *outLevelDiff);
616
#endif /* XML_GE == 1 */
617
618
static unsigned long getDebugLevel(const char *variableName,
619
unsigned long defaultDebugLevel);
620
621
#define poolStart(pool) ((pool)->start)
622
#define poolLength(pool) ((pool)->ptr - (pool)->start)
623
#define poolChop(pool) ((void)--(pool->ptr))
624
#define poolLastChar(pool) (((pool)->ptr)[-1])
625
#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
626
#define poolFinish(pool) ((pool)->start = (pool)->ptr)
627
#define poolAppendChar(pool, c) \
628
(((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
629
? 0 \
630
: ((*((pool)->ptr)++ = c), 1))
631
632
#if ! defined(XML_TESTING)
633
const
634
#endif
635
XML_Bool g_reparseDeferralEnabledDefault
636
= XML_TRUE; // write ONLY in runtests.c
637
#if defined(XML_TESTING)
638
unsigned int g_bytesScanned = 0; // used for testing only
639
#endif
640
641
struct XML_ParserStruct {
642
/* The first member must be m_userData so that the XML_GetUserData
643
macro works. */
644
void *m_userData;
645
void *m_handlerArg;
646
647
// How the four parse buffer pointers below relate in time and space:
648
//
649
// m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim
650
// | | | |
651
// <--parsed-->| | |
652
// <---parsing--->| |
653
// <--unoccupied-->|
654
// <---------total-malloced/realloced-------->|
655
656
char *m_buffer; // malloc/realloc base pointer of parse buffer
657
const XML_Memory_Handling_Suite m_mem;
658
const char *m_bufferPtr; // first character to be parsed
659
char *m_bufferEnd; // past last character to be parsed
660
const char *m_bufferLim; // allocated end of m_buffer
661
662
XML_Index m_parseEndByteIndex;
663
const char *m_parseEndPtr;
664
size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
665
XML_Bool m_reparseDeferralEnabled;
666
int m_lastBufferRequestSize;
667
XML_Char *m_dataBuf;
668
XML_Char *m_dataBufEnd;
669
XML_StartElementHandler m_startElementHandler;
670
XML_EndElementHandler m_endElementHandler;
671
XML_CharacterDataHandler m_characterDataHandler;
672
XML_ProcessingInstructionHandler m_processingInstructionHandler;
673
XML_CommentHandler m_commentHandler;
674
XML_StartCdataSectionHandler m_startCdataSectionHandler;
675
XML_EndCdataSectionHandler m_endCdataSectionHandler;
676
XML_DefaultHandler m_defaultHandler;
677
XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
678
XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
679
XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
680
XML_NotationDeclHandler m_notationDeclHandler;
681
XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
682
XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
683
XML_NotStandaloneHandler m_notStandaloneHandler;
684
XML_ExternalEntityRefHandler m_externalEntityRefHandler;
685
XML_Parser m_externalEntityRefHandlerArg;
686
XML_SkippedEntityHandler m_skippedEntityHandler;
687
XML_UnknownEncodingHandler m_unknownEncodingHandler;
688
XML_ElementDeclHandler m_elementDeclHandler;
689
XML_AttlistDeclHandler m_attlistDeclHandler;
690
XML_EntityDeclHandler m_entityDeclHandler;
691
XML_XmlDeclHandler m_xmlDeclHandler;
692
const ENCODING *m_encoding;
693
INIT_ENCODING m_initEncoding;
694
const ENCODING *m_internalEncoding;
695
const XML_Char *m_protocolEncodingName;
696
XML_Bool m_ns;
697
XML_Bool m_ns_triplets;
698
void *m_unknownEncodingMem;
699
void *m_unknownEncodingData;
700
void *m_unknownEncodingHandlerData;
701
void(XMLCALL *m_unknownEncodingRelease)(void *);
702
PROLOG_STATE m_prologState;
703
Processor *m_processor;
704
enum XML_Error m_errorCode;
705
const char *m_eventPtr;
706
const char *m_eventEndPtr;
707
const char *m_positionPtr;
708
OPEN_INTERNAL_ENTITY *m_openInternalEntities;
709
OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
710
XML_Bool m_defaultExpandInternalEntities;
711
int m_tagLevel;
712
ENTITY *m_declEntity;
713
const XML_Char *m_doctypeName;
714
const XML_Char *m_doctypeSysid;
715
const XML_Char *m_doctypePubid;
716
const XML_Char *m_declAttributeType;
717
const XML_Char *m_declNotationName;
718
const XML_Char *m_declNotationPublicId;
719
ELEMENT_TYPE *m_declElementType;
720
ATTRIBUTE_ID *m_declAttributeId;
721
XML_Bool m_declAttributeIsCdata;
722
XML_Bool m_declAttributeIsId;
723
DTD *m_dtd;
724
const XML_Char *m_curBase;
725
TAG *m_tagStack;
726
TAG *m_freeTagList;
727
BINDING *m_inheritedBindings;
728
BINDING *m_freeBindingList;
729
int m_attsSize;
730
int m_nSpecifiedAtts;
731
int m_idAttIndex;
732
ATTRIBUTE *m_atts;
733
NS_ATT *m_nsAtts;
734
unsigned long m_nsAttsVersion;
735
unsigned char m_nsAttsPower;
736
#ifdef XML_ATTR_INFO
737
XML_AttrInfo *m_attInfo;
738
#endif
739
POSITION m_position;
740
STRING_POOL m_tempPool;
741
STRING_POOL m_temp2Pool;
742
char *m_groupConnector;
743
unsigned int m_groupSize;
744
XML_Char m_namespaceSeparator;
745
XML_Parser m_parentParser;
746
XML_ParsingStatus m_parsingStatus;
747
#ifdef XML_DTD
748
XML_Bool m_isParamEntity;
749
XML_Bool m_useForeignDTD;
750
enum XML_ParamEntityParsing m_paramEntityParsing;
751
#endif
752
unsigned long m_hash_secret_salt;
753
#if XML_GE == 1
754
ACCOUNTING m_accounting;
755
ENTITY_STATS m_entity_stats;
756
#endif
757
};
758
759
#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
760
#define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
761
#define FREE(parser, p) (parser->m_mem.free_fcn((p)))
762
763
XML_Parser XMLCALL
764
XML_ParserCreate(const XML_Char *encodingName) {
765
return XML_ParserCreate_MM(encodingName, NULL, NULL);
766
}
767
768
XML_Parser XMLCALL
769
XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
770
XML_Char tmp[2] = {nsSep, 0};
771
return XML_ParserCreate_MM(encodingName, NULL, tmp);
772
}
773
774
// "xml=http://www.w3.org/XML/1998/namespace"
775
static const XML_Char implicitContext[]
776
= {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
777
ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
778
ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
779
ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
780
ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
781
ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
782
ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
783
ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
784
'\0'};
785
786
/* To avoid warnings about unused functions: */
787
#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
788
789
# if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
790
791
/* Obtain entropy on Linux 3.17+ */
792
static int
793
writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
794
int success = 0; /* full count bytes written? */
795
size_t bytesWrittenTotal = 0;
796
const unsigned int getrandomFlags = GRND_NONBLOCK;
797
798
do {
799
void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
800
const size_t bytesToWrite = count - bytesWrittenTotal;
801
802
const int bytesWrittenMore =
803
# if defined(HAVE_GETRANDOM)
804
getrandom(currentTarget, bytesToWrite, getrandomFlags);
805
# else
806
syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
807
# endif
808
809
if (bytesWrittenMore > 0) {
810
bytesWrittenTotal += bytesWrittenMore;
811
if (bytesWrittenTotal >= count)
812
success = 1;
813
}
814
} while (! success && (errno == EINTR));
815
816
return success;
817
}
818
819
# endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
820
821
# if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
822
823
/* Extract entropy from /dev/urandom */
824
static int
825
writeRandomBytes_dev_urandom(void *target, size_t count) {
826
int success = 0; /* full count bytes written? */
827
size_t bytesWrittenTotal = 0;
828
829
const int fd = open("/dev/urandom", O_RDONLY);
830
if (fd < 0) {
831
return 0;
832
}
833
834
do {
835
void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
836
const size_t bytesToWrite = count - bytesWrittenTotal;
837
838
const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
839
840
if (bytesWrittenMore > 0) {
841
bytesWrittenTotal += bytesWrittenMore;
842
if (bytesWrittenTotal >= count)
843
success = 1;
844
}
845
} while (! success && (errno == EINTR));
846
847
close(fd);
848
return success;
849
}
850
851
# endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
852
853
#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
854
855
#if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
856
857
static void
858
writeRandomBytes_arc4random(void *target, size_t count) {
859
size_t bytesWrittenTotal = 0;
860
861
while (bytesWrittenTotal < count) {
862
const uint32_t random32 = arc4random();
863
size_t i = 0;
864
865
for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
866
i++, bytesWrittenTotal++) {
867
const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
868
((uint8_t *)target)[bytesWrittenTotal] = random8;
869
}
870
}
871
}
872
873
#endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
874
875
#ifdef _WIN32
876
877
/* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
878
as it didn't declare it in its header prior to version 5.3.0 of its
879
runtime package (mingwrt, containing stdlib.h). The upstream fix
880
was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
881
# if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
882
&& __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
883
__declspec(dllimport) int rand_s(unsigned int *);
884
# endif
885
886
/* Obtain entropy on Windows using the rand_s() function which
887
* generates cryptographically secure random numbers. Internally it
888
* uses RtlGenRandom API which is present in Windows XP and later.
889
*/
890
static int
891
writeRandomBytes_rand_s(void *target, size_t count) {
892
size_t bytesWrittenTotal = 0;
893
894
while (bytesWrittenTotal < count) {
895
unsigned int random32 = 0;
896
size_t i = 0;
897
898
if (rand_s(&random32))
899
return 0; /* failure */
900
901
for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
902
i++, bytesWrittenTotal++) {
903
const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
904
((uint8_t *)target)[bytesWrittenTotal] = random8;
905
}
906
}
907
return 1; /* success */
908
}
909
910
#endif /* _WIN32 */
911
912
#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
913
914
static unsigned long
915
gather_time_entropy(void) {
916
# ifdef _WIN32
917
FILETIME ft;
918
GetSystemTimeAsFileTime(&ft); /* never fails */
919
return ft.dwHighDateTime ^ ft.dwLowDateTime;
920
# else
921
struct timeval tv;
922
int gettimeofday_res;
923
924
gettimeofday_res = gettimeofday(&tv, NULL);
925
926
# if defined(NDEBUG)
927
(void)gettimeofday_res;
928
# else
929
assert(gettimeofday_res == 0);
930
# endif /* defined(NDEBUG) */
931
932
/* Microseconds time is <20 bits entropy */
933
return tv.tv_usec;
934
# endif
935
}
936
937
#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
938
939
static unsigned long
940
ENTROPY_DEBUG(const char *label, unsigned long entropy) {
941
if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
942
fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
943
(int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
944
}
945
return entropy;
946
}
947
948
static unsigned long
949
generate_hash_secret_salt(XML_Parser parser) {
950
unsigned long entropy;
951
(void)parser;
952
953
/* "Failproof" high quality providers: */
954
#if defined(HAVE_ARC4RANDOM_BUF)
955
arc4random_buf(&entropy, sizeof(entropy));
956
return ENTROPY_DEBUG("arc4random_buf", entropy);
957
#elif defined(HAVE_ARC4RANDOM)
958
writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
959
return ENTROPY_DEBUG("arc4random", entropy);
960
#else
961
/* Try high quality providers first .. */
962
# ifdef _WIN32
963
if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
964
return ENTROPY_DEBUG("rand_s", entropy);
965
}
966
# elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
967
if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
968
return ENTROPY_DEBUG("getrandom", entropy);
969
}
970
# endif
971
# if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
972
if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
973
return ENTROPY_DEBUG("/dev/urandom", entropy);
974
}
975
# endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
976
/* .. and self-made low quality for backup: */
977
978
/* Process ID is 0 bits entropy if attacker has local access */
979
entropy = gather_time_entropy() ^ getpid();
980
981
/* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
982
if (sizeof(unsigned long) == 4) {
983
return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
984
} else {
985
return ENTROPY_DEBUG("fallback(8)",
986
entropy * (unsigned long)2305843009213693951ULL);
987
}
988
#endif
989
}
990
991
static unsigned long
992
get_hash_secret_salt(XML_Parser parser) {
993
if (parser->m_parentParser != NULL)
994
return get_hash_secret_salt(parser->m_parentParser);
995
return parser->m_hash_secret_salt;
996
}
997
998
static enum XML_Error
999
callProcessor(XML_Parser parser, const char *start, const char *end,
1000
const char **endPtr) {
1001
const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
1002
1003
if (parser->m_reparseDeferralEnabled
1004
&& ! parser->m_parsingStatus.finalBuffer) {
1005
// Heuristic: don't try to parse a partial token again until the amount of
1006
// available data has increased significantly.
1007
const size_t had_before = parser->m_partialTokenBytesBefore;
1008
// ...but *do* try anyway if we're close to causing a reallocation.
1009
size_t available_buffer
1010
= EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1011
#if XML_CONTEXT_BYTES > 0
1012
available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1013
#endif
1014
available_buffer
1015
+= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1016
// m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1017
const bool enough
1018
= (have_now >= 2 * had_before)
1019
|| ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1020
1021
if (! enough) {
1022
*endPtr = start; // callers may expect this to be set
1023
return XML_ERROR_NONE;
1024
}
1025
}
1026
#if defined(XML_TESTING)
1027
g_bytesScanned += (unsigned)have_now;
1028
#endif
1029
const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
1030
if (ret == XML_ERROR_NONE) {
1031
// if we consumed nothing, remember what we had on this parse attempt.
1032
if (*endPtr == start) {
1033
parser->m_partialTokenBytesBefore = have_now;
1034
} else {
1035
parser->m_partialTokenBytesBefore = 0;
1036
}
1037
}
1038
return ret;
1039
}
1040
1041
static XML_Bool /* only valid for root parser */
1042
startParsing(XML_Parser parser) {
1043
/* hash functions must be initialized before setContext() is called */
1044
if (parser->m_hash_secret_salt == 0)
1045
parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
1046
if (parser->m_ns) {
1047
/* implicit context only set for root parser, since child
1048
parsers (i.e. external entity parsers) will inherit it
1049
*/
1050
return setContext(parser, implicitContext);
1051
}
1052
return XML_TRUE;
1053
}
1054
1055
XML_Parser XMLCALL
1056
XML_ParserCreate_MM(const XML_Char *encodingName,
1057
const XML_Memory_Handling_Suite *memsuite,
1058
const XML_Char *nameSep) {
1059
return parserCreate(encodingName, memsuite, nameSep, NULL);
1060
}
1061
1062
static XML_Parser
1063
parserCreate(const XML_Char *encodingName,
1064
const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1065
DTD *dtd) {
1066
XML_Parser parser;
1067
1068
if (memsuite) {
1069
XML_Memory_Handling_Suite *mtemp;
1070
parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1071
if (parser != NULL) {
1072
mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1073
mtemp->malloc_fcn = memsuite->malloc_fcn;
1074
mtemp->realloc_fcn = memsuite->realloc_fcn;
1075
mtemp->free_fcn = memsuite->free_fcn;
1076
}
1077
} else {
1078
XML_Memory_Handling_Suite *mtemp;
1079
parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
1080
if (parser != NULL) {
1081
mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1082
mtemp->malloc_fcn = malloc;
1083
mtemp->realloc_fcn = realloc;
1084
mtemp->free_fcn = free;
1085
}
1086
}
1087
1088
if (! parser)
1089
return parser;
1090
1091
parser->m_buffer = NULL;
1092
parser->m_bufferLim = NULL;
1093
1094
parser->m_attsSize = INIT_ATTS_SIZE;
1095
parser->m_atts
1096
= (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1097
if (parser->m_atts == NULL) {
1098
FREE(parser, parser);
1099
return NULL;
1100
}
1101
#ifdef XML_ATTR_INFO
1102
parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1103
parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1104
if (parser->m_attInfo == NULL) {
1105
FREE(parser, parser->m_atts);
1106
FREE(parser, parser);
1107
return NULL;
1108
}
1109
#endif
1110
parser->m_dataBuf
1111
= (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1112
if (parser->m_dataBuf == NULL) {
1113
FREE(parser, parser->m_atts);
1114
#ifdef XML_ATTR_INFO
1115
FREE(parser, parser->m_attInfo);
1116
#endif
1117
FREE(parser, parser);
1118
return NULL;
1119
}
1120
parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1121
1122
if (dtd)
1123
parser->m_dtd = dtd;
1124
else {
1125
parser->m_dtd = dtdCreate(&parser->m_mem);
1126
if (parser->m_dtd == NULL) {
1127
FREE(parser, parser->m_dataBuf);
1128
FREE(parser, parser->m_atts);
1129
#ifdef XML_ATTR_INFO
1130
FREE(parser, parser->m_attInfo);
1131
#endif
1132
FREE(parser, parser);
1133
return NULL;
1134
}
1135
}
1136
1137
parser->m_freeBindingList = NULL;
1138
parser->m_freeTagList = NULL;
1139
parser->m_freeInternalEntities = NULL;
1140
1141
parser->m_groupSize = 0;
1142
parser->m_groupConnector = NULL;
1143
1144
parser->m_unknownEncodingHandler = NULL;
1145
parser->m_unknownEncodingHandlerData = NULL;
1146
1147
parser->m_namespaceSeparator = ASCII_EXCL;
1148
parser->m_ns = XML_FALSE;
1149
parser->m_ns_triplets = XML_FALSE;
1150
1151
parser->m_nsAtts = NULL;
1152
parser->m_nsAttsVersion = 0;
1153
parser->m_nsAttsPower = 0;
1154
1155
parser->m_protocolEncodingName = NULL;
1156
1157
poolInit(&parser->m_tempPool, &(parser->m_mem));
1158
poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1159
parserInit(parser, encodingName);
1160
1161
if (encodingName && ! parser->m_protocolEncodingName) {
1162
if (dtd) {
1163
// We need to stop the upcoming call to XML_ParserFree from happily
1164
// destroying parser->m_dtd because the DTD is shared with the parent
1165
// parser and the only guard that keeps XML_ParserFree from destroying
1166
// parser->m_dtd is parser->m_isParamEntity but it will be set to
1167
// XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1168
parser->m_dtd = NULL;
1169
}
1170
XML_ParserFree(parser);
1171
return NULL;
1172
}
1173
1174
if (nameSep) {
1175
parser->m_ns = XML_TRUE;
1176
parser->m_internalEncoding = XmlGetInternalEncodingNS();
1177
parser->m_namespaceSeparator = *nameSep;
1178
} else {
1179
parser->m_internalEncoding = XmlGetInternalEncoding();
1180
}
1181
1182
return parser;
1183
}
1184
1185
static void
1186
parserInit(XML_Parser parser, const XML_Char *encodingName) {
1187
parser->m_processor = prologInitProcessor;
1188
XmlPrologStateInit(&parser->m_prologState);
1189
if (encodingName != NULL) {
1190
parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1191
}
1192
parser->m_curBase = NULL;
1193
XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1194
parser->m_userData = NULL;
1195
parser->m_handlerArg = NULL;
1196
parser->m_startElementHandler = NULL;
1197
parser->m_endElementHandler = NULL;
1198
parser->m_characterDataHandler = NULL;
1199
parser->m_processingInstructionHandler = NULL;
1200
parser->m_commentHandler = NULL;
1201
parser->m_startCdataSectionHandler = NULL;
1202
parser->m_endCdataSectionHandler = NULL;
1203
parser->m_defaultHandler = NULL;
1204
parser->m_startDoctypeDeclHandler = NULL;
1205
parser->m_endDoctypeDeclHandler = NULL;
1206
parser->m_unparsedEntityDeclHandler = NULL;
1207
parser->m_notationDeclHandler = NULL;
1208
parser->m_startNamespaceDeclHandler = NULL;
1209
parser->m_endNamespaceDeclHandler = NULL;
1210
parser->m_notStandaloneHandler = NULL;
1211
parser->m_externalEntityRefHandler = NULL;
1212
parser->m_externalEntityRefHandlerArg = parser;
1213
parser->m_skippedEntityHandler = NULL;
1214
parser->m_elementDeclHandler = NULL;
1215
parser->m_attlistDeclHandler = NULL;
1216
parser->m_entityDeclHandler = NULL;
1217
parser->m_xmlDeclHandler = NULL;
1218
parser->m_bufferPtr = parser->m_buffer;
1219
parser->m_bufferEnd = parser->m_buffer;
1220
parser->m_parseEndByteIndex = 0;
1221
parser->m_parseEndPtr = NULL;
1222
parser->m_partialTokenBytesBefore = 0;
1223
parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1224
parser->m_lastBufferRequestSize = 0;
1225
parser->m_declElementType = NULL;
1226
parser->m_declAttributeId = NULL;
1227
parser->m_declEntity = NULL;
1228
parser->m_doctypeName = NULL;
1229
parser->m_doctypeSysid = NULL;
1230
parser->m_doctypePubid = NULL;
1231
parser->m_declAttributeType = NULL;
1232
parser->m_declNotationName = NULL;
1233
parser->m_declNotationPublicId = NULL;
1234
parser->m_declAttributeIsCdata = XML_FALSE;
1235
parser->m_declAttributeIsId = XML_FALSE;
1236
memset(&parser->m_position, 0, sizeof(POSITION));
1237
parser->m_errorCode = XML_ERROR_NONE;
1238
parser->m_eventPtr = NULL;
1239
parser->m_eventEndPtr = NULL;
1240
parser->m_positionPtr = NULL;
1241
parser->m_openInternalEntities = NULL;
1242
parser->m_defaultExpandInternalEntities = XML_TRUE;
1243
parser->m_tagLevel = 0;
1244
parser->m_tagStack = NULL;
1245
parser->m_inheritedBindings = NULL;
1246
parser->m_nSpecifiedAtts = 0;
1247
parser->m_unknownEncodingMem = NULL;
1248
parser->m_unknownEncodingRelease = NULL;
1249
parser->m_unknownEncodingData = NULL;
1250
parser->m_parentParser = NULL;
1251
parser->m_parsingStatus.parsing = XML_INITIALIZED;
1252
#ifdef XML_DTD
1253
parser->m_isParamEntity = XML_FALSE;
1254
parser->m_useForeignDTD = XML_FALSE;
1255
parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1256
#endif
1257
parser->m_hash_secret_salt = 0;
1258
1259
#if XML_GE == 1
1260
memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1261
parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1262
parser->m_accounting.maximumAmplificationFactor
1263
= EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1264
parser->m_accounting.activationThresholdBytes
1265
= EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1266
1267
memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1268
parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1269
#endif
1270
}
1271
1272
/* moves list of bindings to m_freeBindingList */
1273
static void FASTCALL
1274
moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1275
while (bindings) {
1276
BINDING *b = bindings;
1277
bindings = bindings->nextTagBinding;
1278
b->nextTagBinding = parser->m_freeBindingList;
1279
parser->m_freeBindingList = b;
1280
}
1281
}
1282
1283
XML_Bool XMLCALL
1284
XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1285
TAG *tStk;
1286
OPEN_INTERNAL_ENTITY *openEntityList;
1287
1288
if (parser == NULL)
1289
return XML_FALSE;
1290
1291
if (parser->m_parentParser)
1292
return XML_FALSE;
1293
/* move m_tagStack to m_freeTagList */
1294
tStk = parser->m_tagStack;
1295
while (tStk) {
1296
TAG *tag = tStk;
1297
tStk = tStk->parent;
1298
tag->parent = parser->m_freeTagList;
1299
moveToFreeBindingList(parser, tag->bindings);
1300
tag->bindings = NULL;
1301
parser->m_freeTagList = tag;
1302
}
1303
/* move m_openInternalEntities to m_freeInternalEntities */
1304
openEntityList = parser->m_openInternalEntities;
1305
while (openEntityList) {
1306
OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1307
openEntityList = openEntity->next;
1308
openEntity->next = parser->m_freeInternalEntities;
1309
parser->m_freeInternalEntities = openEntity;
1310
}
1311
moveToFreeBindingList(parser, parser->m_inheritedBindings);
1312
FREE(parser, parser->m_unknownEncodingMem);
1313
if (parser->m_unknownEncodingRelease)
1314
parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1315
poolClear(&parser->m_tempPool);
1316
poolClear(&parser->m_temp2Pool);
1317
FREE(parser, (void *)parser->m_protocolEncodingName);
1318
parser->m_protocolEncodingName = NULL;
1319
parserInit(parser, encodingName);
1320
dtdReset(parser->m_dtd, &parser->m_mem);
1321
return XML_TRUE;
1322
}
1323
1324
enum XML_Status XMLCALL
1325
XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1326
if (parser == NULL)
1327
return XML_STATUS_ERROR;
1328
/* Block after XML_Parse()/XML_ParseBuffer() has been called.
1329
XXX There's no way for the caller to determine which of the
1330
XXX possible error cases caused the XML_STATUS_ERROR return.
1331
*/
1332
if (parser->m_parsingStatus.parsing == XML_PARSING
1333
|| parser->m_parsingStatus.parsing == XML_SUSPENDED)
1334
return XML_STATUS_ERROR;
1335
1336
/* Get rid of any previous encoding name */
1337
FREE(parser, (void *)parser->m_protocolEncodingName);
1338
1339
if (encodingName == NULL)
1340
/* No new encoding name */
1341
parser->m_protocolEncodingName = NULL;
1342
else {
1343
/* Copy the new encoding name into allocated memory */
1344
parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1345
if (! parser->m_protocolEncodingName)
1346
return XML_STATUS_ERROR;
1347
}
1348
return XML_STATUS_OK;
1349
}
1350
1351
XML_Parser XMLCALL
1352
XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1353
const XML_Char *encodingName) {
1354
XML_Parser parser = oldParser;
1355
DTD *newDtd = NULL;
1356
DTD *oldDtd;
1357
XML_StartElementHandler oldStartElementHandler;
1358
XML_EndElementHandler oldEndElementHandler;
1359
XML_CharacterDataHandler oldCharacterDataHandler;
1360
XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1361
XML_CommentHandler oldCommentHandler;
1362
XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1363
XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1364
XML_DefaultHandler oldDefaultHandler;
1365
XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1366
XML_NotationDeclHandler oldNotationDeclHandler;
1367
XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1368
XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1369
XML_NotStandaloneHandler oldNotStandaloneHandler;
1370
XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1371
XML_SkippedEntityHandler oldSkippedEntityHandler;
1372
XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1373
XML_ElementDeclHandler oldElementDeclHandler;
1374
XML_AttlistDeclHandler oldAttlistDeclHandler;
1375
XML_EntityDeclHandler oldEntityDeclHandler;
1376
XML_XmlDeclHandler oldXmlDeclHandler;
1377
ELEMENT_TYPE *oldDeclElementType;
1378
1379
void *oldUserData;
1380
void *oldHandlerArg;
1381
XML_Bool oldDefaultExpandInternalEntities;
1382
XML_Parser oldExternalEntityRefHandlerArg;
1383
#ifdef XML_DTD
1384
enum XML_ParamEntityParsing oldParamEntityParsing;
1385
int oldInEntityValue;
1386
#endif
1387
XML_Bool oldns_triplets;
1388
/* Note that the new parser shares the same hash secret as the old
1389
parser, so that dtdCopy and copyEntityTable can lookup values
1390
from hash tables associated with either parser without us having
1391
to worry which hash secrets each table has.
1392
*/
1393
unsigned long oldhash_secret_salt;
1394
XML_Bool oldReparseDeferralEnabled;
1395
1396
/* Validate the oldParser parameter before we pull everything out of it */
1397
if (oldParser == NULL)
1398
return NULL;
1399
1400
/* Stash the original parser contents on the stack */
1401
oldDtd = parser->m_dtd;
1402
oldStartElementHandler = parser->m_startElementHandler;
1403
oldEndElementHandler = parser->m_endElementHandler;
1404
oldCharacterDataHandler = parser->m_characterDataHandler;
1405
oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1406
oldCommentHandler = parser->m_commentHandler;
1407
oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1408
oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1409
oldDefaultHandler = parser->m_defaultHandler;
1410
oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1411
oldNotationDeclHandler = parser->m_notationDeclHandler;
1412
oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1413
oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1414
oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1415
oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1416
oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1417
oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1418
oldElementDeclHandler = parser->m_elementDeclHandler;
1419
oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1420
oldEntityDeclHandler = parser->m_entityDeclHandler;
1421
oldXmlDeclHandler = parser->m_xmlDeclHandler;
1422
oldDeclElementType = parser->m_declElementType;
1423
1424
oldUserData = parser->m_userData;
1425
oldHandlerArg = parser->m_handlerArg;
1426
oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1427
oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1428
#ifdef XML_DTD
1429
oldParamEntityParsing = parser->m_paramEntityParsing;
1430
oldInEntityValue = parser->m_prologState.inEntityValue;
1431
#endif
1432
oldns_triplets = parser->m_ns_triplets;
1433
/* Note that the new parser shares the same hash secret as the old
1434
parser, so that dtdCopy and copyEntityTable can lookup values
1435
from hash tables associated with either parser without us having
1436
to worry which hash secrets each table has.
1437
*/
1438
oldhash_secret_salt = parser->m_hash_secret_salt;
1439
oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1440
1441
#ifdef XML_DTD
1442
if (! context)
1443
newDtd = oldDtd;
1444
#endif /* XML_DTD */
1445
1446
/* Note that the magical uses of the pre-processor to make field
1447
access look more like C++ require that `parser' be overwritten
1448
here. This makes this function more painful to follow than it
1449
would be otherwise.
1450
*/
1451
if (parser->m_ns) {
1452
XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1453
parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1454
} else {
1455
parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1456
}
1457
1458
if (! parser)
1459
return NULL;
1460
1461
parser->m_startElementHandler = oldStartElementHandler;
1462
parser->m_endElementHandler = oldEndElementHandler;
1463
parser->m_characterDataHandler = oldCharacterDataHandler;
1464
parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1465
parser->m_commentHandler = oldCommentHandler;
1466
parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1467
parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1468
parser->m_defaultHandler = oldDefaultHandler;
1469
parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1470
parser->m_notationDeclHandler = oldNotationDeclHandler;
1471
parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1472
parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1473
parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1474
parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1475
parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1476
parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1477
parser->m_elementDeclHandler = oldElementDeclHandler;
1478
parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1479
parser->m_entityDeclHandler = oldEntityDeclHandler;
1480
parser->m_xmlDeclHandler = oldXmlDeclHandler;
1481
parser->m_declElementType = oldDeclElementType;
1482
parser->m_userData = oldUserData;
1483
if (oldUserData == oldHandlerArg)
1484
parser->m_handlerArg = parser->m_userData;
1485
else
1486
parser->m_handlerArg = parser;
1487
if (oldExternalEntityRefHandlerArg != oldParser)
1488
parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1489
parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1490
parser->m_ns_triplets = oldns_triplets;
1491
parser->m_hash_secret_salt = oldhash_secret_salt;
1492
parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1493
parser->m_parentParser = oldParser;
1494
#ifdef XML_DTD
1495
parser->m_paramEntityParsing = oldParamEntityParsing;
1496
parser->m_prologState.inEntityValue = oldInEntityValue;
1497
if (context) {
1498
#endif /* XML_DTD */
1499
if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1500
|| ! setContext(parser, context)) {
1501
XML_ParserFree(parser);
1502
return NULL;
1503
}
1504
parser->m_processor = externalEntityInitProcessor;
1505
#ifdef XML_DTD
1506
} else {
1507
/* The DTD instance referenced by parser->m_dtd is shared between the
1508
document's root parser and external PE parsers, therefore one does not
1509
need to call setContext. In addition, one also *must* not call
1510
setContext, because this would overwrite existing prefix->binding
1511
pointers in parser->m_dtd with ones that get destroyed with the external
1512
PE parser. This would leave those prefixes with dangling pointers.
1513
*/
1514
parser->m_isParamEntity = XML_TRUE;
1515
XmlPrologStateInitExternalEntity(&parser->m_prologState);
1516
parser->m_processor = externalParEntInitProcessor;
1517
}
1518
#endif /* XML_DTD */
1519
return parser;
1520
}
1521
1522
static void FASTCALL
1523
destroyBindings(BINDING *bindings, XML_Parser parser) {
1524
for (;;) {
1525
BINDING *b = bindings;
1526
if (! b)
1527
break;
1528
bindings = b->nextTagBinding;
1529
FREE(parser, b->uri);
1530
FREE(parser, b);
1531
}
1532
}
1533
1534
void XMLCALL
1535
XML_ParserFree(XML_Parser parser) {
1536
TAG *tagList;
1537
OPEN_INTERNAL_ENTITY *entityList;
1538
if (parser == NULL)
1539
return;
1540
/* free m_tagStack and m_freeTagList */
1541
tagList = parser->m_tagStack;
1542
for (;;) {
1543
TAG *p;
1544
if (tagList == NULL) {
1545
if (parser->m_freeTagList == NULL)
1546
break;
1547
tagList = parser->m_freeTagList;
1548
parser->m_freeTagList = NULL;
1549
}
1550
p = tagList;
1551
tagList = tagList->parent;
1552
FREE(parser, p->buf);
1553
destroyBindings(p->bindings, parser);
1554
FREE(parser, p);
1555
}
1556
/* free m_openInternalEntities and m_freeInternalEntities */
1557
entityList = parser->m_openInternalEntities;
1558
for (;;) {
1559
OPEN_INTERNAL_ENTITY *openEntity;
1560
if (entityList == NULL) {
1561
if (parser->m_freeInternalEntities == NULL)
1562
break;
1563
entityList = parser->m_freeInternalEntities;
1564
parser->m_freeInternalEntities = NULL;
1565
}
1566
openEntity = entityList;
1567
entityList = entityList->next;
1568
FREE(parser, openEntity);
1569
}
1570
1571
destroyBindings(parser->m_freeBindingList, parser);
1572
destroyBindings(parser->m_inheritedBindings, parser);
1573
poolDestroy(&parser->m_tempPool);
1574
poolDestroy(&parser->m_temp2Pool);
1575
FREE(parser, (void *)parser->m_protocolEncodingName);
1576
#ifdef XML_DTD
1577
/* external parameter entity parsers share the DTD structure
1578
parser->m_dtd with the root parser, so we must not destroy it
1579
*/
1580
if (! parser->m_isParamEntity && parser->m_dtd)
1581
#else
1582
if (parser->m_dtd)
1583
#endif /* XML_DTD */
1584
dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1585
&parser->m_mem);
1586
FREE(parser, (void *)parser->m_atts);
1587
#ifdef XML_ATTR_INFO
1588
FREE(parser, (void *)parser->m_attInfo);
1589
#endif
1590
FREE(parser, parser->m_groupConnector);
1591
FREE(parser, parser->m_buffer);
1592
FREE(parser, parser->m_dataBuf);
1593
FREE(parser, parser->m_nsAtts);
1594
FREE(parser, parser->m_unknownEncodingMem);
1595
if (parser->m_unknownEncodingRelease)
1596
parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1597
FREE(parser, parser);
1598
}
1599
1600
void XMLCALL
1601
XML_UseParserAsHandlerArg(XML_Parser parser) {
1602
if (parser != NULL)
1603
parser->m_handlerArg = parser;
1604
}
1605
1606
enum XML_Error XMLCALL
1607
XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1608
if (parser == NULL)
1609
return XML_ERROR_INVALID_ARGUMENT;
1610
#ifdef XML_DTD
1611
/* block after XML_Parse()/XML_ParseBuffer() has been called */
1612
if (parser->m_parsingStatus.parsing == XML_PARSING
1613
|| parser->m_parsingStatus.parsing == XML_SUSPENDED)
1614
return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1615
parser->m_useForeignDTD = useDTD;
1616
return XML_ERROR_NONE;
1617
#else
1618
UNUSED_P(useDTD);
1619
return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1620
#endif
1621
}
1622
1623
void XMLCALL
1624
XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1625
if (parser == NULL)
1626
return;
1627
/* block after XML_Parse()/XML_ParseBuffer() has been called */
1628
if (parser->m_parsingStatus.parsing == XML_PARSING
1629
|| parser->m_parsingStatus.parsing == XML_SUSPENDED)
1630
return;
1631
parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1632
}
1633
1634
void XMLCALL
1635
XML_SetUserData(XML_Parser parser, void *p) {
1636
if (parser == NULL)
1637
return;
1638
if (parser->m_handlerArg == parser->m_userData)
1639
parser->m_handlerArg = parser->m_userData = p;
1640
else
1641
parser->m_userData = p;
1642
}
1643
1644
enum XML_Status XMLCALL
1645
XML_SetBase(XML_Parser parser, const XML_Char *p) {
1646
if (parser == NULL)
1647
return XML_STATUS_ERROR;
1648
if (p) {
1649
p = poolCopyString(&parser->m_dtd->pool, p);
1650
if (! p)
1651
return XML_STATUS_ERROR;
1652
parser->m_curBase = p;
1653
} else
1654
parser->m_curBase = NULL;
1655
return XML_STATUS_OK;
1656
}
1657
1658
const XML_Char *XMLCALL
1659
XML_GetBase(XML_Parser parser) {
1660
if (parser == NULL)
1661
return NULL;
1662
return parser->m_curBase;
1663
}
1664
1665
int XMLCALL
1666
XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1667
if (parser == NULL)
1668
return -1;
1669
return parser->m_nSpecifiedAtts;
1670
}
1671
1672
int XMLCALL
1673
XML_GetIdAttributeIndex(XML_Parser parser) {
1674
if (parser == NULL)
1675
return -1;
1676
return parser->m_idAttIndex;
1677
}
1678
1679
#ifdef XML_ATTR_INFO
1680
const XML_AttrInfo *XMLCALL
1681
XML_GetAttributeInfo(XML_Parser parser) {
1682
if (parser == NULL)
1683
return NULL;
1684
return parser->m_attInfo;
1685
}
1686
#endif
1687
1688
void XMLCALL
1689
XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1690
XML_EndElementHandler end) {
1691
if (parser == NULL)
1692
return;
1693
parser->m_startElementHandler = start;
1694
parser->m_endElementHandler = end;
1695
}
1696
1697
void XMLCALL
1698
XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1699
if (parser != NULL)
1700
parser->m_startElementHandler = start;
1701
}
1702
1703
void XMLCALL
1704
XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1705
if (parser != NULL)
1706
parser->m_endElementHandler = end;
1707
}
1708
1709
void XMLCALL
1710
XML_SetCharacterDataHandler(XML_Parser parser,
1711
XML_CharacterDataHandler handler) {
1712
if (parser != NULL)
1713
parser->m_characterDataHandler = handler;
1714
}
1715
1716
void XMLCALL
1717
XML_SetProcessingInstructionHandler(XML_Parser parser,
1718
XML_ProcessingInstructionHandler handler) {
1719
if (parser != NULL)
1720
parser->m_processingInstructionHandler = handler;
1721
}
1722
1723
void XMLCALL
1724
XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1725
if (parser != NULL)
1726
parser->m_commentHandler = handler;
1727
}
1728
1729
void XMLCALL
1730
XML_SetCdataSectionHandler(XML_Parser parser,
1731
XML_StartCdataSectionHandler start,
1732
XML_EndCdataSectionHandler end) {
1733
if (parser == NULL)
1734
return;
1735
parser->m_startCdataSectionHandler = start;
1736
parser->m_endCdataSectionHandler = end;
1737
}
1738
1739
void XMLCALL
1740
XML_SetStartCdataSectionHandler(XML_Parser parser,
1741
XML_StartCdataSectionHandler start) {
1742
if (parser != NULL)
1743
parser->m_startCdataSectionHandler = start;
1744
}
1745
1746
void XMLCALL
1747
XML_SetEndCdataSectionHandler(XML_Parser parser,
1748
XML_EndCdataSectionHandler end) {
1749
if (parser != NULL)
1750
parser->m_endCdataSectionHandler = end;
1751
}
1752
1753
void XMLCALL
1754
XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1755
if (parser == NULL)
1756
return;
1757
parser->m_defaultHandler = handler;
1758
parser->m_defaultExpandInternalEntities = XML_FALSE;
1759
}
1760
1761
void XMLCALL
1762
XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1763
if (parser == NULL)
1764
return;
1765
parser->m_defaultHandler = handler;
1766
parser->m_defaultExpandInternalEntities = XML_TRUE;
1767
}
1768
1769
void XMLCALL
1770
XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1771
XML_EndDoctypeDeclHandler end) {
1772
if (parser == NULL)
1773
return;
1774
parser->m_startDoctypeDeclHandler = start;
1775
parser->m_endDoctypeDeclHandler = end;
1776
}
1777
1778
void XMLCALL
1779
XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1780
XML_StartDoctypeDeclHandler start) {
1781
if (parser != NULL)
1782
parser->m_startDoctypeDeclHandler = start;
1783
}
1784
1785
void XMLCALL
1786
XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1787
if (parser != NULL)
1788
parser->m_endDoctypeDeclHandler = end;
1789
}
1790
1791
void XMLCALL
1792
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1793
XML_UnparsedEntityDeclHandler handler) {
1794
if (parser != NULL)
1795
parser->m_unparsedEntityDeclHandler = handler;
1796
}
1797
1798
void XMLCALL
1799
XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1800
if (parser != NULL)
1801
parser->m_notationDeclHandler = handler;
1802
}
1803
1804
void XMLCALL
1805
XML_SetNamespaceDeclHandler(XML_Parser parser,
1806
XML_StartNamespaceDeclHandler start,
1807
XML_EndNamespaceDeclHandler end) {
1808
if (parser == NULL)
1809
return;
1810
parser->m_startNamespaceDeclHandler = start;
1811
parser->m_endNamespaceDeclHandler = end;
1812
}
1813
1814
void XMLCALL
1815
XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1816
XML_StartNamespaceDeclHandler start) {
1817
if (parser != NULL)
1818
parser->m_startNamespaceDeclHandler = start;
1819
}
1820
1821
void XMLCALL
1822
XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1823
XML_EndNamespaceDeclHandler end) {
1824
if (parser != NULL)
1825
parser->m_endNamespaceDeclHandler = end;
1826
}
1827
1828
void XMLCALL
1829
XML_SetNotStandaloneHandler(XML_Parser parser,
1830
XML_NotStandaloneHandler handler) {
1831
if (parser != NULL)
1832
parser->m_notStandaloneHandler = handler;
1833
}
1834
1835
void XMLCALL
1836
XML_SetExternalEntityRefHandler(XML_Parser parser,
1837
XML_ExternalEntityRefHandler handler) {
1838
if (parser != NULL)
1839
parser->m_externalEntityRefHandler = handler;
1840
}
1841
1842
void XMLCALL
1843
XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1844
if (parser == NULL)
1845
return;
1846
if (arg)
1847
parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1848
else
1849
parser->m_externalEntityRefHandlerArg = parser;
1850
}
1851
1852
void XMLCALL
1853
XML_SetSkippedEntityHandler(XML_Parser parser,
1854
XML_SkippedEntityHandler handler) {
1855
if (parser != NULL)
1856
parser->m_skippedEntityHandler = handler;
1857
}
1858
1859
void XMLCALL
1860
XML_SetUnknownEncodingHandler(XML_Parser parser,
1861
XML_UnknownEncodingHandler handler, void *data) {
1862
if (parser == NULL)
1863
return;
1864
parser->m_unknownEncodingHandler = handler;
1865
parser->m_unknownEncodingHandlerData = data;
1866
}
1867
1868
void XMLCALL
1869
XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1870
if (parser != NULL)
1871
parser->m_elementDeclHandler = eldecl;
1872
}
1873
1874
void XMLCALL
1875
XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1876
if (parser != NULL)
1877
parser->m_attlistDeclHandler = attdecl;
1878
}
1879
1880
void XMLCALL
1881
XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1882
if (parser != NULL)
1883
parser->m_entityDeclHandler = handler;
1884
}
1885
1886
void XMLCALL
1887
XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1888
if (parser != NULL)
1889
parser->m_xmlDeclHandler = handler;
1890
}
1891
1892
int XMLCALL
1893
XML_SetParamEntityParsing(XML_Parser parser,
1894
enum XML_ParamEntityParsing peParsing) {
1895
if (parser == NULL)
1896
return 0;
1897
/* block after XML_Parse()/XML_ParseBuffer() has been called */
1898
if (parser->m_parsingStatus.parsing == XML_PARSING
1899
|| parser->m_parsingStatus.parsing == XML_SUSPENDED)
1900
return 0;
1901
#ifdef XML_DTD
1902
parser->m_paramEntityParsing = peParsing;
1903
return 1;
1904
#else
1905
return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1906
#endif
1907
}
1908
1909
int XMLCALL
1910
XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
1911
if (parser == NULL)
1912
return 0;
1913
if (parser->m_parentParser)
1914
return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1915
/* block after XML_Parse()/XML_ParseBuffer() has been called */
1916
if (parser->m_parsingStatus.parsing == XML_PARSING
1917
|| parser->m_parsingStatus.parsing == XML_SUSPENDED)
1918
return 0;
1919
parser->m_hash_secret_salt = hash_salt;
1920
return 1;
1921
}
1922
1923
enum XML_Status XMLCALL
1924
XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
1925
if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1926
if (parser != NULL)
1927
parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1928
return XML_STATUS_ERROR;
1929
}
1930
switch (parser->m_parsingStatus.parsing) {
1931
case XML_SUSPENDED:
1932
parser->m_errorCode = XML_ERROR_SUSPENDED;
1933
return XML_STATUS_ERROR;
1934
case XML_FINISHED:
1935
parser->m_errorCode = XML_ERROR_FINISHED;
1936
return XML_STATUS_ERROR;
1937
case XML_INITIALIZED:
1938
if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1939
parser->m_errorCode = XML_ERROR_NO_MEMORY;
1940
return XML_STATUS_ERROR;
1941
}
1942
/* fall through */
1943
default:
1944
parser->m_parsingStatus.parsing = XML_PARSING;
1945
}
1946
1947
#if XML_CONTEXT_BYTES == 0
1948
if (parser->m_bufferPtr == parser->m_bufferEnd) {
1949
const char *end;
1950
int nLeftOver;
1951
enum XML_Status result;
1952
/* Detect overflow (a+b > MAX <==> b > MAX-a) */
1953
if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1954
parser->m_errorCode = XML_ERROR_NO_MEMORY;
1955
parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1956
parser->m_processor = errorProcessor;
1957
return XML_STATUS_ERROR;
1958
}
1959
// though this isn't a buffer request, we assume that `len` is the app's
1960
// preferred buffer fill size, and therefore save it here.
1961
parser->m_lastBufferRequestSize = len;
1962
parser->m_parseEndByteIndex += len;
1963
parser->m_positionPtr = s;
1964
parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1965
1966
parser->m_errorCode
1967
= callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
1968
1969
if (parser->m_errorCode != XML_ERROR_NONE) {
1970
parser->m_eventEndPtr = parser->m_eventPtr;
1971
parser->m_processor = errorProcessor;
1972
return XML_STATUS_ERROR;
1973
} else {
1974
switch (parser->m_parsingStatus.parsing) {
1975
case XML_SUSPENDED:
1976
result = XML_STATUS_SUSPENDED;
1977
break;
1978
case XML_INITIALIZED:
1979
case XML_PARSING:
1980
if (isFinal) {
1981
parser->m_parsingStatus.parsing = XML_FINISHED;
1982
return XML_STATUS_OK;
1983
}
1984
/* fall through */
1985
default:
1986
result = XML_STATUS_OK;
1987
}
1988
}
1989
1990
XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1991
&parser->m_position);
1992
nLeftOver = s + len - end;
1993
if (nLeftOver) {
1994
// Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
1995
// (and XML_ERROR_FINISHED) from XML_GetBuffer.
1996
const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
1997
parser->m_parsingStatus.parsing = XML_PARSING;
1998
void *const temp = XML_GetBuffer(parser, nLeftOver);
1999
parser->m_parsingStatus.parsing = originalStatus;
2000
// GetBuffer may have overwritten this, but we want to remember what the
2001
// app requested, not how many bytes were left over after parsing.
2002
parser->m_lastBufferRequestSize = len;
2003
if (temp == NULL) {
2004
// NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
2005
parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2006
parser->m_processor = errorProcessor;
2007
return XML_STATUS_ERROR;
2008
}
2009
// Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2010
// don't have any data to preserve, and can copy straight into the start
2011
// of the buffer rather than the GetBuffer return pointer (which may be
2012
// pointing further into the allocated buffer).
2013
memcpy(parser->m_buffer, end, nLeftOver);
2014
}
2015
parser->m_bufferPtr = parser->m_buffer;
2016
parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2017
parser->m_positionPtr = parser->m_bufferPtr;
2018
parser->m_parseEndPtr = parser->m_bufferEnd;
2019
parser->m_eventPtr = parser->m_bufferPtr;
2020
parser->m_eventEndPtr = parser->m_bufferPtr;
2021
return result;
2022
}
2023
#endif /* XML_CONTEXT_BYTES == 0 */
2024
void *buff = XML_GetBuffer(parser, len);
2025
if (buff == NULL)
2026
return XML_STATUS_ERROR;
2027
if (len > 0) {
2028
assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2029
memcpy(buff, s, len);
2030
}
2031
return XML_ParseBuffer(parser, len, isFinal);
2032
}
2033
2034
enum XML_Status XMLCALL
2035
XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2036
const char *start;
2037
enum XML_Status result = XML_STATUS_OK;
2038
2039
if (parser == NULL)
2040
return XML_STATUS_ERROR;
2041
switch (parser->m_parsingStatus.parsing) {
2042
case XML_SUSPENDED:
2043
parser->m_errorCode = XML_ERROR_SUSPENDED;
2044
return XML_STATUS_ERROR;
2045
case XML_FINISHED:
2046
parser->m_errorCode = XML_ERROR_FINISHED;
2047
return XML_STATUS_ERROR;
2048
case XML_INITIALIZED:
2049
/* Has someone called XML_GetBuffer successfully before? */
2050
if (! parser->m_bufferPtr) {
2051
parser->m_errorCode = XML_ERROR_NO_BUFFER;
2052
return XML_STATUS_ERROR;
2053
}
2054
2055
if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2056
parser->m_errorCode = XML_ERROR_NO_MEMORY;
2057
return XML_STATUS_ERROR;
2058
}
2059
/* fall through */
2060
default:
2061
parser->m_parsingStatus.parsing = XML_PARSING;
2062
}
2063
2064
start = parser->m_bufferPtr;
2065
parser->m_positionPtr = start;
2066
parser->m_bufferEnd += len;
2067
parser->m_parseEndPtr = parser->m_bufferEnd;
2068
parser->m_parseEndByteIndex += len;
2069
parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2070
2071
parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2072
&parser->m_bufferPtr);
2073
2074
if (parser->m_errorCode != XML_ERROR_NONE) {
2075
parser->m_eventEndPtr = parser->m_eventPtr;
2076
parser->m_processor = errorProcessor;
2077
return XML_STATUS_ERROR;
2078
} else {
2079
switch (parser->m_parsingStatus.parsing) {
2080
case XML_SUSPENDED:
2081
result = XML_STATUS_SUSPENDED;
2082
break;
2083
case XML_INITIALIZED:
2084
case XML_PARSING:
2085
if (isFinal) {
2086
parser->m_parsingStatus.parsing = XML_FINISHED;
2087
return result;
2088
}
2089
default:; /* should not happen */
2090
}
2091
}
2092
2093
XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2094
parser->m_bufferPtr, &parser->m_position);
2095
parser->m_positionPtr = parser->m_bufferPtr;
2096
return result;
2097
}
2098
2099
void *XMLCALL
2100
XML_GetBuffer(XML_Parser parser, int len) {
2101
if (parser == NULL)
2102
return NULL;
2103
if (len < 0) {
2104
parser->m_errorCode = XML_ERROR_NO_MEMORY;
2105
return NULL;
2106
}
2107
switch (parser->m_parsingStatus.parsing) {
2108
case XML_SUSPENDED:
2109
parser->m_errorCode = XML_ERROR_SUSPENDED;
2110
return NULL;
2111
case XML_FINISHED:
2112
parser->m_errorCode = XML_ERROR_FINISHED;
2113
return NULL;
2114
default:;
2115
}
2116
2117
// whether or not the request succeeds, `len` seems to be the app's preferred
2118
// buffer fill size; remember it.
2119
parser->m_lastBufferRequestSize = len;
2120
if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2121
|| parser->m_buffer == NULL) {
2122
#if XML_CONTEXT_BYTES > 0
2123
int keep;
2124
#endif /* XML_CONTEXT_BYTES > 0 */
2125
/* Do not invoke signed arithmetic overflow: */
2126
int neededSize = (int)((unsigned)len
2127
+ (unsigned)EXPAT_SAFE_PTR_DIFF(
2128
parser->m_bufferEnd, parser->m_bufferPtr));
2129
if (neededSize < 0) {
2130
parser->m_errorCode = XML_ERROR_NO_MEMORY;
2131
return NULL;
2132
}
2133
#if XML_CONTEXT_BYTES > 0
2134
keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2135
if (keep > XML_CONTEXT_BYTES)
2136
keep = XML_CONTEXT_BYTES;
2137
/* Detect and prevent integer overflow */
2138
if (keep > INT_MAX - neededSize) {
2139
parser->m_errorCode = XML_ERROR_NO_MEMORY;
2140
return NULL;
2141
}
2142
neededSize += keep;
2143
#endif /* XML_CONTEXT_BYTES > 0 */
2144
if (parser->m_buffer && parser->m_bufferPtr
2145
&& neededSize
2146
<= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2147
#if XML_CONTEXT_BYTES > 0
2148
if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2149
int offset
2150
= (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2151
- keep;
2152
/* The buffer pointers cannot be NULL here; we have at least some bytes
2153
* in the buffer */
2154
memmove(parser->m_buffer, &parser->m_buffer[offset],
2155
parser->m_bufferEnd - parser->m_bufferPtr + keep);
2156
parser->m_bufferEnd -= offset;
2157
parser->m_bufferPtr -= offset;
2158
}
2159
#else
2160
memmove(parser->m_buffer, parser->m_bufferPtr,
2161
EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2162
parser->m_bufferEnd
2163
= parser->m_buffer
2164
+ EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2165
parser->m_bufferPtr = parser->m_buffer;
2166
#endif /* XML_CONTEXT_BYTES > 0 */
2167
} else {
2168
char *newBuf;
2169
int bufferSize
2170
= (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2171
if (bufferSize == 0)
2172
bufferSize = INIT_BUFFER_SIZE;
2173
do {
2174
/* Do not invoke signed arithmetic overflow: */
2175
bufferSize = (int)(2U * (unsigned)bufferSize);
2176
} while (bufferSize < neededSize && bufferSize > 0);
2177
if (bufferSize <= 0) {
2178
parser->m_errorCode = XML_ERROR_NO_MEMORY;
2179
return NULL;
2180
}
2181
newBuf = (char *)MALLOC(parser, bufferSize);
2182
if (newBuf == 0) {
2183
parser->m_errorCode = XML_ERROR_NO_MEMORY;
2184
return NULL;
2185
}
2186
parser->m_bufferLim = newBuf + bufferSize;
2187
#if XML_CONTEXT_BYTES > 0
2188
if (parser->m_bufferPtr) {
2189
memcpy(newBuf, &parser->m_bufferPtr[-keep],
2190
EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2191
+ keep);
2192
FREE(parser, parser->m_buffer);
2193
parser->m_buffer = newBuf;
2194
parser->m_bufferEnd
2195
= parser->m_buffer
2196
+ EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2197
+ keep;
2198
parser->m_bufferPtr = parser->m_buffer + keep;
2199
} else {
2200
/* This must be a brand new buffer with no data in it yet */
2201
parser->m_bufferEnd = newBuf;
2202
parser->m_bufferPtr = parser->m_buffer = newBuf;
2203
}
2204
#else
2205
if (parser->m_bufferPtr) {
2206
memcpy(newBuf, parser->m_bufferPtr,
2207
EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2208
FREE(parser, parser->m_buffer);
2209
parser->m_bufferEnd
2210
= newBuf
2211
+ EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2212
} else {
2213
/* This must be a brand new buffer with no data in it yet */
2214
parser->m_bufferEnd = newBuf;
2215
}
2216
parser->m_bufferPtr = parser->m_buffer = newBuf;
2217
#endif /* XML_CONTEXT_BYTES > 0 */
2218
}
2219
parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2220
parser->m_positionPtr = NULL;
2221
}
2222
return parser->m_bufferEnd;
2223
}
2224
2225
enum XML_Status XMLCALL
2226
XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2227
if (parser == NULL)
2228
return XML_STATUS_ERROR;
2229
switch (parser->m_parsingStatus.parsing) {
2230
case XML_SUSPENDED:
2231
if (resumable) {
2232
parser->m_errorCode = XML_ERROR_SUSPENDED;
2233
return XML_STATUS_ERROR;
2234
}
2235
parser->m_parsingStatus.parsing = XML_FINISHED;
2236
break;
2237
case XML_FINISHED:
2238
parser->m_errorCode = XML_ERROR_FINISHED;
2239
return XML_STATUS_ERROR;
2240
default:
2241
if (resumable) {
2242
#ifdef XML_DTD
2243
if (parser->m_isParamEntity) {
2244
parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2245
return XML_STATUS_ERROR;
2246
}
2247
#endif
2248
parser->m_parsingStatus.parsing = XML_SUSPENDED;
2249
} else
2250
parser->m_parsingStatus.parsing = XML_FINISHED;
2251
}
2252
return XML_STATUS_OK;
2253
}
2254
2255
enum XML_Status XMLCALL
2256
XML_ResumeParser(XML_Parser parser) {
2257
enum XML_Status result = XML_STATUS_OK;
2258
2259
if (parser == NULL)
2260
return XML_STATUS_ERROR;
2261
if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2262
parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2263
return XML_STATUS_ERROR;
2264
}
2265
parser->m_parsingStatus.parsing = XML_PARSING;
2266
2267
parser->m_errorCode = callProcessor(
2268
parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2269
2270
if (parser->m_errorCode != XML_ERROR_NONE) {
2271
parser->m_eventEndPtr = parser->m_eventPtr;
2272
parser->m_processor = errorProcessor;
2273
return XML_STATUS_ERROR;
2274
} else {
2275
switch (parser->m_parsingStatus.parsing) {
2276
case XML_SUSPENDED:
2277
result = XML_STATUS_SUSPENDED;
2278
break;
2279
case XML_INITIALIZED:
2280
case XML_PARSING:
2281
if (parser->m_parsingStatus.finalBuffer) {
2282
parser->m_parsingStatus.parsing = XML_FINISHED;
2283
return result;
2284
}
2285
default:;
2286
}
2287
}
2288
2289
XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2290
parser->m_bufferPtr, &parser->m_position);
2291
parser->m_positionPtr = parser->m_bufferPtr;
2292
return result;
2293
}
2294
2295
void XMLCALL
2296
XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2297
if (parser == NULL)
2298
return;
2299
assert(status != NULL);
2300
*status = parser->m_parsingStatus;
2301
}
2302
2303
enum XML_Error XMLCALL
2304
XML_GetErrorCode(XML_Parser parser) {
2305
if (parser == NULL)
2306
return XML_ERROR_INVALID_ARGUMENT;
2307
return parser->m_errorCode;
2308
}
2309
2310
XML_Index XMLCALL
2311
XML_GetCurrentByteIndex(XML_Parser parser) {
2312
if (parser == NULL)
2313
return -1;
2314
if (parser->m_eventPtr)
2315
return (XML_Index)(parser->m_parseEndByteIndex
2316
- (parser->m_parseEndPtr - parser->m_eventPtr));
2317
return -1;
2318
}
2319
2320
int XMLCALL
2321
XML_GetCurrentByteCount(XML_Parser parser) {
2322
if (parser == NULL)
2323
return 0;
2324
if (parser->m_eventEndPtr && parser->m_eventPtr)
2325
return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2326
return 0;
2327
}
2328
2329
const char *XMLCALL
2330
XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2331
#if XML_CONTEXT_BYTES > 0
2332
if (parser == NULL)
2333
return NULL;
2334
if (parser->m_eventPtr && parser->m_buffer) {
2335
if (offset != NULL)
2336
*offset = (int)(parser->m_eventPtr - parser->m_buffer);
2337
if (size != NULL)
2338
*size = (int)(parser->m_bufferEnd - parser->m_buffer);
2339
return parser->m_buffer;
2340
}
2341
#else
2342
(void)parser;
2343
(void)offset;
2344
(void)size;
2345
#endif /* XML_CONTEXT_BYTES > 0 */
2346
return (const char *)0;
2347
}
2348
2349
XML_Size XMLCALL
2350
XML_GetCurrentLineNumber(XML_Parser parser) {
2351
if (parser == NULL)
2352
return 0;
2353
if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2354
XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2355
parser->m_eventPtr, &parser->m_position);
2356
parser->m_positionPtr = parser->m_eventPtr;
2357
}
2358
return parser->m_position.lineNumber + 1;
2359
}
2360
2361
XML_Size XMLCALL
2362
XML_GetCurrentColumnNumber(XML_Parser parser) {
2363
if (parser == NULL)
2364
return 0;
2365
if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2366
XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2367
parser->m_eventPtr, &parser->m_position);
2368
parser->m_positionPtr = parser->m_eventPtr;
2369
}
2370
return parser->m_position.columnNumber;
2371
}
2372
2373
void XMLCALL
2374
XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2375
if (parser != NULL)
2376
FREE(parser, model);
2377
}
2378
2379
void *XMLCALL
2380
XML_MemMalloc(XML_Parser parser, size_t size) {
2381
if (parser == NULL)
2382
return NULL;
2383
return MALLOC(parser, size);
2384
}
2385
2386
void *XMLCALL
2387
XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2388
if (parser == NULL)
2389
return NULL;
2390
return REALLOC(parser, ptr, size);
2391
}
2392
2393
void XMLCALL
2394
XML_MemFree(XML_Parser parser, void *ptr) {
2395
if (parser != NULL)
2396
FREE(parser, ptr);
2397
}
2398
2399
void XMLCALL
2400
XML_DefaultCurrent(XML_Parser parser) {
2401
if (parser == NULL)
2402
return;
2403
if (parser->m_defaultHandler) {
2404
if (parser->m_openInternalEntities)
2405
reportDefault(parser, parser->m_internalEncoding,
2406
parser->m_openInternalEntities->internalEventPtr,
2407
parser->m_openInternalEntities->internalEventEndPtr);
2408
else
2409
reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2410
parser->m_eventEndPtr);
2411
}
2412
}
2413
2414
const XML_LChar *XMLCALL
2415
XML_ErrorString(enum XML_Error code) {
2416
switch (code) {
2417
case XML_ERROR_NONE:
2418
return NULL;
2419
case XML_ERROR_NO_MEMORY:
2420
return XML_L("out of memory");
2421
case XML_ERROR_SYNTAX:
2422
return XML_L("syntax error");
2423
case XML_ERROR_NO_ELEMENTS:
2424
return XML_L("no element found");
2425
case XML_ERROR_INVALID_TOKEN:
2426
return XML_L("not well-formed (invalid token)");
2427
case XML_ERROR_UNCLOSED_TOKEN:
2428
return XML_L("unclosed token");
2429
case XML_ERROR_PARTIAL_CHAR:
2430
return XML_L("partial character");
2431
case XML_ERROR_TAG_MISMATCH:
2432
return XML_L("mismatched tag");
2433
case XML_ERROR_DUPLICATE_ATTRIBUTE:
2434
return XML_L("duplicate attribute");
2435
case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2436
return XML_L("junk after document element");
2437
case XML_ERROR_PARAM_ENTITY_REF:
2438
return XML_L("illegal parameter entity reference");
2439
case XML_ERROR_UNDEFINED_ENTITY:
2440
return XML_L("undefined entity");
2441
case XML_ERROR_RECURSIVE_ENTITY_REF:
2442
return XML_L("recursive entity reference");
2443
case XML_ERROR_ASYNC_ENTITY:
2444
return XML_L("asynchronous entity");
2445
case XML_ERROR_BAD_CHAR_REF:
2446
return XML_L("reference to invalid character number");
2447
case XML_ERROR_BINARY_ENTITY_REF:
2448
return XML_L("reference to binary entity");
2449
case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2450
return XML_L("reference to external entity in attribute");
2451
case XML_ERROR_MISPLACED_XML_PI:
2452
return XML_L("XML or text declaration not at start of entity");
2453
case XML_ERROR_UNKNOWN_ENCODING:
2454
return XML_L("unknown encoding");
2455
case XML_ERROR_INCORRECT_ENCODING:
2456
return XML_L("encoding specified in XML declaration is incorrect");
2457
case XML_ERROR_UNCLOSED_CDATA_SECTION:
2458
return XML_L("unclosed CDATA section");
2459
case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2460
return XML_L("error in processing external entity reference");
2461
case XML_ERROR_NOT_STANDALONE:
2462
return XML_L("document is not standalone");
2463
case XML_ERROR_UNEXPECTED_STATE:
2464
return XML_L("unexpected parser state - please send a bug report");
2465
case XML_ERROR_ENTITY_DECLARED_IN_PE:
2466
return XML_L("entity declared in parameter entity");
2467
case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2468
return XML_L("requested feature requires XML_DTD support in Expat");
2469
case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2470
return XML_L("cannot change setting once parsing has begun");
2471
/* Added in 1.95.7. */
2472
case XML_ERROR_UNBOUND_PREFIX:
2473
return XML_L("unbound prefix");
2474
/* Added in 1.95.8. */
2475
case XML_ERROR_UNDECLARING_PREFIX:
2476
return XML_L("must not undeclare prefix");
2477
case XML_ERROR_INCOMPLETE_PE:
2478
return XML_L("incomplete markup in parameter entity");
2479
case XML_ERROR_XML_DECL:
2480
return XML_L("XML declaration not well-formed");
2481
case XML_ERROR_TEXT_DECL:
2482
return XML_L("text declaration not well-formed");
2483
case XML_ERROR_PUBLICID:
2484
return XML_L("illegal character(s) in public id");
2485
case XML_ERROR_SUSPENDED:
2486
return XML_L("parser suspended");
2487
case XML_ERROR_NOT_SUSPENDED:
2488
return XML_L("parser not suspended");
2489
case XML_ERROR_ABORTED:
2490
return XML_L("parsing aborted");
2491
case XML_ERROR_FINISHED:
2492
return XML_L("parsing finished");
2493
case XML_ERROR_SUSPEND_PE:
2494
return XML_L("cannot suspend in external parameter entity");
2495
/* Added in 2.0.0. */
2496
case XML_ERROR_RESERVED_PREFIX_XML:
2497
return XML_L(
2498
"reserved prefix (xml) must not be undeclared or bound to another namespace name");
2499
case XML_ERROR_RESERVED_PREFIX_XMLNS:
2500
return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2501
case XML_ERROR_RESERVED_NAMESPACE_URI:
2502
return XML_L(
2503
"prefix must not be bound to one of the reserved namespace names");
2504
/* Added in 2.2.5. */
2505
case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2506
return XML_L("invalid argument");
2507
/* Added in 2.3.0. */
2508
case XML_ERROR_NO_BUFFER:
2509
return XML_L(
2510
"a successful prior call to function XML_GetBuffer is required");
2511
/* Added in 2.4.0. */
2512
case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2513
return XML_L(
2514
"limit on input amplification factor (from DTD and entities) breached");
2515
}
2516
return NULL;
2517
}
2518
2519
const XML_LChar *XMLCALL
2520
XML_ExpatVersion(void) {
2521
/* V1 is used to string-ize the version number. However, it would
2522
string-ize the actual version macro *names* unless we get them
2523
substituted before being passed to V1. CPP is defined to expand
2524
a macro, then rescan for more expansions. Thus, we use V2 to expand
2525
the version macros, then CPP will expand the resulting V1() macro
2526
with the correct numerals. */
2527
/* ### I'm assuming cpp is portable in this respect... */
2528
2529
#define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2530
#define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2531
2532
return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2533
2534
#undef V1
2535
#undef V2
2536
}
2537
2538
XML_Expat_Version XMLCALL
2539
XML_ExpatVersionInfo(void) {
2540
XML_Expat_Version version;
2541
2542
version.major = XML_MAJOR_VERSION;
2543
version.minor = XML_MINOR_VERSION;
2544
version.micro = XML_MICRO_VERSION;
2545
2546
return version;
2547
}
2548
2549
const XML_Feature *XMLCALL
2550
XML_GetFeatureList(void) {
2551
static const XML_Feature features[] = {
2552
{XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2553
sizeof(XML_Char)},
2554
{XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2555
sizeof(XML_LChar)},
2556
#ifdef XML_UNICODE
2557
{XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2558
#endif
2559
#ifdef XML_UNICODE_WCHAR_T
2560
{XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2561
#endif
2562
#ifdef XML_DTD
2563
{XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2564
#endif
2565
#if XML_CONTEXT_BYTES > 0
2566
{XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2567
XML_CONTEXT_BYTES},
2568
#endif
2569
#ifdef XML_MIN_SIZE
2570
{XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2571
#endif
2572
#ifdef XML_NS
2573
{XML_FEATURE_NS, XML_L("XML_NS"), 0},
2574
#endif
2575
#ifdef XML_LARGE_SIZE
2576
{XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2577
#endif
2578
#ifdef XML_ATTR_INFO
2579
{XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2580
#endif
2581
#if XML_GE == 1
2582
/* Added in Expat 2.4.0 for XML_DTD defined and
2583
* added in Expat 2.6.0 for XML_GE == 1. */
2584
{XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2585
XML_L("XML_BLAP_MAX_AMP"),
2586
(long int)
2587
EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2588
{XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2589
XML_L("XML_BLAP_ACT_THRES"),
2590
EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2591
/* Added in Expat 2.6.0. */
2592
{XML_FEATURE_GE, XML_L("XML_GE"), 0},
2593
#endif
2594
{XML_FEATURE_END, NULL, 0}};
2595
2596
return features;
2597
}
2598
2599
#if XML_GE == 1
2600
XML_Bool XMLCALL
2601
XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2602
XML_Parser parser, float maximumAmplificationFactor) {
2603
if ((parser == NULL) || (parser->m_parentParser != NULL)
2604
|| isnan(maximumAmplificationFactor)
2605
|| (maximumAmplificationFactor < 1.0f)) {
2606
return XML_FALSE;
2607
}
2608
parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2609
return XML_TRUE;
2610
}
2611
2612
XML_Bool XMLCALL
2613
XML_SetBillionLaughsAttackProtectionActivationThreshold(
2614
XML_Parser parser, unsigned long long activationThresholdBytes) {
2615
if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2616
return XML_FALSE;
2617
}
2618
parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2619
return XML_TRUE;
2620
}
2621
#endif /* XML_GE == 1 */
2622
2623
XML_Bool XMLCALL
2624
XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
2625
if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
2626
parser->m_reparseDeferralEnabled = enabled;
2627
return XML_TRUE;
2628
}
2629
return XML_FALSE;
2630
}
2631
2632
/* Initially tag->rawName always points into the parse buffer;
2633
for those TAG instances opened while the current parse buffer was
2634
processed, and not yet closed, we need to store tag->rawName in a more
2635
permanent location, since the parse buffer is about to be discarded.
2636
*/
2637
static XML_Bool
2638
storeRawNames(XML_Parser parser) {
2639
TAG *tag = parser->m_tagStack;
2640
while (tag) {
2641
int bufSize;
2642
int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2643
size_t rawNameLen;
2644
char *rawNameBuf = tag->buf + nameLen;
2645
/* Stop if already stored. Since m_tagStack is a stack, we can stop
2646
at the first entry that has already been copied; everything
2647
below it in the stack is already been accounted for in a
2648
previous call to this function.
2649
*/
2650
if (tag->rawName == rawNameBuf)
2651
break;
2652
/* For reuse purposes we need to ensure that the
2653
size of tag->buf is a multiple of sizeof(XML_Char).
2654
*/
2655
rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2656
/* Detect and prevent integer overflow. */
2657
if (rawNameLen > (size_t)INT_MAX - nameLen)
2658
return XML_FALSE;
2659
bufSize = nameLen + (int)rawNameLen;
2660
if (bufSize > tag->bufEnd - tag->buf) {
2661
char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2662
if (temp == NULL)
2663
return XML_FALSE;
2664
/* if tag->name.str points to tag->buf (only when namespace
2665
processing is off) then we have to update it
2666
*/
2667
if (tag->name.str == (XML_Char *)tag->buf)
2668
tag->name.str = (XML_Char *)temp;
2669
/* if tag->name.localPart is set (when namespace processing is on)
2670
then update it as well, since it will always point into tag->buf
2671
*/
2672
if (tag->name.localPart)
2673
tag->name.localPart
2674
= (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2675
tag->buf = temp;
2676
tag->bufEnd = temp + bufSize;
2677
rawNameBuf = temp + nameLen;
2678
}
2679
memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2680
tag->rawName = rawNameBuf;
2681
tag = tag->parent;
2682
}
2683
return XML_TRUE;
2684
}
2685
2686
static enum XML_Error PTRCALL
2687
contentProcessor(XML_Parser parser, const char *start, const char *end,
2688
const char **endPtr) {
2689
enum XML_Error result = doContent(
2690
parser, 0, parser->m_encoding, start, end, endPtr,
2691
(XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
2692
if (result == XML_ERROR_NONE) {
2693
if (! storeRawNames(parser))
2694
return XML_ERROR_NO_MEMORY;
2695
}
2696
return result;
2697
}
2698
2699
static enum XML_Error PTRCALL
2700
externalEntityInitProcessor(XML_Parser parser, const char *start,
2701
const char *end, const char **endPtr) {
2702
enum XML_Error result = initializeEncoding(parser);
2703
if (result != XML_ERROR_NONE)
2704
return result;
2705
parser->m_processor = externalEntityInitProcessor2;
2706
return externalEntityInitProcessor2(parser, start, end, endPtr);
2707
}
2708
2709
static enum XML_Error PTRCALL
2710
externalEntityInitProcessor2(XML_Parser parser, const char *start,
2711
const char *end, const char **endPtr) {
2712
const char *next = start; /* XmlContentTok doesn't always set the last arg */
2713
int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2714
switch (tok) {
2715
case XML_TOK_BOM:
2716
#if XML_GE == 1
2717
if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2718
XML_ACCOUNT_DIRECT)) {
2719
accountingOnAbort(parser);
2720
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2721
}
2722
#endif /* XML_GE == 1 */
2723
2724
/* If we are at the end of the buffer, this would cause the next stage,
2725
i.e. externalEntityInitProcessor3, to pass control directly to
2726
doContent (by detecting XML_TOK_NONE) without processing any xml text
2727
declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2728
*/
2729
if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2730
*endPtr = next;
2731
return XML_ERROR_NONE;
2732
}
2733
start = next;
2734
break;
2735
case XML_TOK_PARTIAL:
2736
if (! parser->m_parsingStatus.finalBuffer) {
2737
*endPtr = start;
2738
return XML_ERROR_NONE;
2739
}
2740
parser->m_eventPtr = start;
2741
return XML_ERROR_UNCLOSED_TOKEN;
2742
case XML_TOK_PARTIAL_CHAR:
2743
if (! parser->m_parsingStatus.finalBuffer) {
2744
*endPtr = start;
2745
return XML_ERROR_NONE;
2746
}
2747
parser->m_eventPtr = start;
2748
return XML_ERROR_PARTIAL_CHAR;
2749
}
2750
parser->m_processor = externalEntityInitProcessor3;
2751
return externalEntityInitProcessor3(parser, start, end, endPtr);
2752
}
2753
2754
static enum XML_Error PTRCALL
2755
externalEntityInitProcessor3(XML_Parser parser, const char *start,
2756
const char *end, const char **endPtr) {
2757
int tok;
2758
const char *next = start; /* XmlContentTok doesn't always set the last arg */
2759
parser->m_eventPtr = start;
2760
tok = XmlContentTok(parser->m_encoding, start, end, &next);
2761
/* Note: These bytes are accounted later in:
2762
- processXmlDecl
2763
- externalEntityContentProcessor
2764
*/
2765
parser->m_eventEndPtr = next;
2766
2767
switch (tok) {
2768
case XML_TOK_XML_DECL: {
2769
enum XML_Error result;
2770
result = processXmlDecl(parser, 1, start, next);
2771
if (result != XML_ERROR_NONE)
2772
return result;
2773
switch (parser->m_parsingStatus.parsing) {
2774
case XML_SUSPENDED:
2775
*endPtr = next;
2776
return XML_ERROR_NONE;
2777
case XML_FINISHED:
2778
return XML_ERROR_ABORTED;
2779
default:
2780
start = next;
2781
}
2782
} break;
2783
case XML_TOK_PARTIAL:
2784
if (! parser->m_parsingStatus.finalBuffer) {
2785
*endPtr = start;
2786
return XML_ERROR_NONE;
2787
}
2788
return XML_ERROR_UNCLOSED_TOKEN;
2789
case XML_TOK_PARTIAL_CHAR:
2790
if (! parser->m_parsingStatus.finalBuffer) {
2791
*endPtr = start;
2792
return XML_ERROR_NONE;
2793
}
2794
return XML_ERROR_PARTIAL_CHAR;
2795
}
2796
parser->m_processor = externalEntityContentProcessor;
2797
parser->m_tagLevel = 1;
2798
return externalEntityContentProcessor(parser, start, end, endPtr);
2799
}
2800
2801
static enum XML_Error PTRCALL
2802
externalEntityContentProcessor(XML_Parser parser, const char *start,
2803
const char *end, const char **endPtr) {
2804
enum XML_Error result
2805
= doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2806
(XML_Bool)! parser->m_parsingStatus.finalBuffer,
2807
XML_ACCOUNT_ENTITY_EXPANSION);
2808
if (result == XML_ERROR_NONE) {
2809
if (! storeRawNames(parser))
2810
return XML_ERROR_NO_MEMORY;
2811
}
2812
return result;
2813
}
2814
2815
static enum XML_Error
2816
doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2817
const char *s, const char *end, const char **nextPtr,
2818
XML_Bool haveMore, enum XML_Account account) {
2819
/* save one level of indirection */
2820
DTD *const dtd = parser->m_dtd;
2821
2822
const char **eventPP;
2823
const char **eventEndPP;
2824
if (enc == parser->m_encoding) {
2825
eventPP = &parser->m_eventPtr;
2826
eventEndPP = &parser->m_eventEndPtr;
2827
} else {
2828
eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2829
eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2830
}
2831
*eventPP = s;
2832
2833
for (;;) {
2834
const char *next = s; /* XmlContentTok doesn't always set the last arg */
2835
int tok = XmlContentTok(enc, s, end, &next);
2836
#if XML_GE == 1
2837
const char *accountAfter
2838
= ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2839
? (haveMore ? s /* i.e. 0 bytes */ : end)
2840
: next;
2841
if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2842
account)) {
2843
accountingOnAbort(parser);
2844
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2845
}
2846
#endif
2847
*eventEndPP = next;
2848
switch (tok) {
2849
case XML_TOK_TRAILING_CR:
2850
if (haveMore) {
2851
*nextPtr = s;
2852
return XML_ERROR_NONE;
2853
}
2854
*eventEndPP = end;
2855
if (parser->m_characterDataHandler) {
2856
XML_Char c = 0xA;
2857
parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2858
} else if (parser->m_defaultHandler)
2859
reportDefault(parser, enc, s, end);
2860
/* We are at the end of the final buffer, should we check for
2861
XML_SUSPENDED, XML_FINISHED?
2862
*/
2863
if (startTagLevel == 0)
2864
return XML_ERROR_NO_ELEMENTS;
2865
if (parser->m_tagLevel != startTagLevel)
2866
return XML_ERROR_ASYNC_ENTITY;
2867
*nextPtr = end;
2868
return XML_ERROR_NONE;
2869
case XML_TOK_NONE:
2870
if (haveMore) {
2871
*nextPtr = s;
2872
return XML_ERROR_NONE;
2873
}
2874
if (startTagLevel > 0) {
2875
if (parser->m_tagLevel != startTagLevel)
2876
return XML_ERROR_ASYNC_ENTITY;
2877
*nextPtr = s;
2878
return XML_ERROR_NONE;
2879
}
2880
return XML_ERROR_NO_ELEMENTS;
2881
case XML_TOK_INVALID:
2882
*eventPP = next;
2883
return XML_ERROR_INVALID_TOKEN;
2884
case XML_TOK_PARTIAL:
2885
if (haveMore) {
2886
*nextPtr = s;
2887
return XML_ERROR_NONE;
2888
}
2889
return XML_ERROR_UNCLOSED_TOKEN;
2890
case XML_TOK_PARTIAL_CHAR:
2891
if (haveMore) {
2892
*nextPtr = s;
2893
return XML_ERROR_NONE;
2894
}
2895
return XML_ERROR_PARTIAL_CHAR;
2896
case XML_TOK_ENTITY_REF: {
2897
const XML_Char *name;
2898
ENTITY *entity;
2899
XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2900
enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2901
if (ch) {
2902
#if XML_GE == 1
2903
/* NOTE: We are replacing 4-6 characters original input for 1 character
2904
* so there is no amplification and hence recording without
2905
* protection. */
2906
accountingDiffTolerated(parser, tok, (char *)&ch,
2907
((char *)&ch) + sizeof(XML_Char), __LINE__,
2908
XML_ACCOUNT_ENTITY_EXPANSION);
2909
#endif /* XML_GE == 1 */
2910
if (parser->m_characterDataHandler)
2911
parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2912
else if (parser->m_defaultHandler)
2913
reportDefault(parser, enc, s, next);
2914
break;
2915
}
2916
name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2917
next - enc->minBytesPerChar);
2918
if (! name)
2919
return XML_ERROR_NO_MEMORY;
2920
entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2921
poolDiscard(&dtd->pool);
2922
/* First, determine if a check for an existing declaration is needed;
2923
if yes, check that the entity exists, and that it is internal,
2924
otherwise call the skipped entity or default handler.
2925
*/
2926
if (! dtd->hasParamEntityRefs || dtd->standalone) {
2927
if (! entity)
2928
return XML_ERROR_UNDEFINED_ENTITY;
2929
else if (! entity->is_internal)
2930
return XML_ERROR_ENTITY_DECLARED_IN_PE;
2931
} else if (! entity) {
2932
if (parser->m_skippedEntityHandler)
2933
parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2934
else if (parser->m_defaultHandler)
2935
reportDefault(parser, enc, s, next);
2936
break;
2937
}
2938
if (entity->open)
2939
return XML_ERROR_RECURSIVE_ENTITY_REF;
2940
if (entity->notation)
2941
return XML_ERROR_BINARY_ENTITY_REF;
2942
if (entity->textPtr) {
2943
enum XML_Error result;
2944
if (! parser->m_defaultExpandInternalEntities) {
2945
if (parser->m_skippedEntityHandler)
2946
parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2947
0);
2948
else if (parser->m_defaultHandler)
2949
reportDefault(parser, enc, s, next);
2950
break;
2951
}
2952
result = processInternalEntity(parser, entity, XML_FALSE);
2953
if (result != XML_ERROR_NONE)
2954
return result;
2955
} else if (parser->m_externalEntityRefHandler) {
2956
const XML_Char *context;
2957
entity->open = XML_TRUE;
2958
context = getContext(parser);
2959
entity->open = XML_FALSE;
2960
if (! context)
2961
return XML_ERROR_NO_MEMORY;
2962
if (! parser->m_externalEntityRefHandler(
2963
parser->m_externalEntityRefHandlerArg, context, entity->base,
2964
entity->systemId, entity->publicId))
2965
return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2966
poolDiscard(&parser->m_tempPool);
2967
} else if (parser->m_defaultHandler)
2968
reportDefault(parser, enc, s, next);
2969
break;
2970
}
2971
case XML_TOK_START_TAG_NO_ATTS:
2972
/* fall through */
2973
case XML_TOK_START_TAG_WITH_ATTS: {
2974
TAG *tag;
2975
enum XML_Error result;
2976
XML_Char *toPtr;
2977
if (parser->m_freeTagList) {
2978
tag = parser->m_freeTagList;
2979
parser->m_freeTagList = parser->m_freeTagList->parent;
2980
} else {
2981
tag = (TAG *)MALLOC(parser, sizeof(TAG));
2982
if (! tag)
2983
return XML_ERROR_NO_MEMORY;
2984
tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2985
if (! tag->buf) {
2986
FREE(parser, tag);
2987
return XML_ERROR_NO_MEMORY;
2988
}
2989
tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2990
}
2991
tag->bindings = NULL;
2992
tag->parent = parser->m_tagStack;
2993
parser->m_tagStack = tag;
2994
tag->name.localPart = NULL;
2995
tag->name.prefix = NULL;
2996
tag->rawName = s + enc->minBytesPerChar;
2997
tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2998
++parser->m_tagLevel;
2999
{
3000
const char *rawNameEnd = tag->rawName + tag->rawNameLength;
3001
const char *fromPtr = tag->rawName;
3002
toPtr = (XML_Char *)tag->buf;
3003
for (;;) {
3004
int bufSize;
3005
int convLen;
3006
const enum XML_Convert_Result convert_res
3007
= XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3008
(ICHAR *)tag->bufEnd - 1);
3009
convLen = (int)(toPtr - (XML_Char *)tag->buf);
3010
if ((fromPtr >= rawNameEnd)
3011
|| (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3012
tag->name.strLen = convLen;
3013
break;
3014
}
3015
bufSize = (int)(tag->bufEnd - tag->buf) << 1;
3016
{
3017
char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
3018
if (temp == NULL)
3019
return XML_ERROR_NO_MEMORY;
3020
tag->buf = temp;
3021
tag->bufEnd = temp + bufSize;
3022
toPtr = (XML_Char *)temp + convLen;
3023
}
3024
}
3025
}
3026
tag->name.str = (XML_Char *)tag->buf;
3027
*toPtr = XML_T('\0');
3028
result
3029
= storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3030
if (result)
3031
return result;
3032
if (parser->m_startElementHandler)
3033
parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3034
(const XML_Char **)parser->m_atts);
3035
else if (parser->m_defaultHandler)
3036
reportDefault(parser, enc, s, next);
3037
poolClear(&parser->m_tempPool);
3038
break;
3039
}
3040
case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3041
/* fall through */
3042
case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3043
const char *rawName = s + enc->minBytesPerChar;
3044
enum XML_Error result;
3045
BINDING *bindings = NULL;
3046
XML_Bool noElmHandlers = XML_TRUE;
3047
TAG_NAME name;
3048
name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3049
rawName + XmlNameLength(enc, rawName));
3050
if (! name.str)
3051
return XML_ERROR_NO_MEMORY;
3052
poolFinish(&parser->m_tempPool);
3053
result = storeAtts(parser, enc, s, &name, &bindings,
3054
XML_ACCOUNT_NONE /* token spans whole start tag */);
3055
if (result != XML_ERROR_NONE) {
3056
freeBindings(parser, bindings);
3057
return result;
3058
}
3059
poolFinish(&parser->m_tempPool);
3060
if (parser->m_startElementHandler) {
3061
parser->m_startElementHandler(parser->m_handlerArg, name.str,
3062
(const XML_Char **)parser->m_atts);
3063
noElmHandlers = XML_FALSE;
3064
}
3065
if (parser->m_endElementHandler) {
3066
if (parser->m_startElementHandler)
3067
*eventPP = *eventEndPP;
3068
parser->m_endElementHandler(parser->m_handlerArg, name.str);
3069
noElmHandlers = XML_FALSE;
3070
}
3071
if (noElmHandlers && parser->m_defaultHandler)
3072
reportDefault(parser, enc, s, next);
3073
poolClear(&parser->m_tempPool);
3074
freeBindings(parser, bindings);
3075
}
3076
if ((parser->m_tagLevel == 0)
3077
&& (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3078
if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3079
parser->m_processor = epilogProcessor;
3080
else
3081
return epilogProcessor(parser, next, end, nextPtr);
3082
}
3083
break;
3084
case XML_TOK_END_TAG:
3085
if (parser->m_tagLevel == startTagLevel)
3086
return XML_ERROR_ASYNC_ENTITY;
3087
else {
3088
int len;
3089
const char *rawName;
3090
TAG *tag = parser->m_tagStack;
3091
rawName = s + enc->minBytesPerChar * 2;
3092
len = XmlNameLength(enc, rawName);
3093
if (len != tag->rawNameLength
3094
|| memcmp(tag->rawName, rawName, len) != 0) {
3095
*eventPP = rawName;
3096
return XML_ERROR_TAG_MISMATCH;
3097
}
3098
parser->m_tagStack = tag->parent;
3099
tag->parent = parser->m_freeTagList;
3100
parser->m_freeTagList = tag;
3101
--parser->m_tagLevel;
3102
if (parser->m_endElementHandler) {
3103
const XML_Char *localPart;
3104
const XML_Char *prefix;
3105
XML_Char *uri;
3106
localPart = tag->name.localPart;
3107
if (parser->m_ns && localPart) {
3108
/* localPart and prefix may have been overwritten in
3109
tag->name.str, since this points to the binding->uri
3110
buffer which gets reused; so we have to add them again
3111
*/
3112
uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3113
/* don't need to check for space - already done in storeAtts() */
3114
while (*localPart)
3115
*uri++ = *localPart++;
3116
prefix = tag->name.prefix;
3117
if (parser->m_ns_triplets && prefix) {
3118
*uri++ = parser->m_namespaceSeparator;
3119
while (*prefix)
3120
*uri++ = *prefix++;
3121
}
3122
*uri = XML_T('\0');
3123
}
3124
parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3125
} else if (parser->m_defaultHandler)
3126
reportDefault(parser, enc, s, next);
3127
while (tag->bindings) {
3128
BINDING *b = tag->bindings;
3129
if (parser->m_endNamespaceDeclHandler)
3130
parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3131
b->prefix->name);
3132
tag->bindings = tag->bindings->nextTagBinding;
3133
b->nextTagBinding = parser->m_freeBindingList;
3134
parser->m_freeBindingList = b;
3135
b->prefix->binding = b->prevPrefixBinding;
3136
}
3137
if ((parser->m_tagLevel == 0)
3138
&& (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3139
if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3140
parser->m_processor = epilogProcessor;
3141
else
3142
return epilogProcessor(parser, next, end, nextPtr);
3143
}
3144
}
3145
break;
3146
case XML_TOK_CHAR_REF: {
3147
int n = XmlCharRefNumber(enc, s);
3148
if (n < 0)
3149
return XML_ERROR_BAD_CHAR_REF;
3150
if (parser->m_characterDataHandler) {
3151
XML_Char buf[XML_ENCODE_MAX];
3152
parser->m_characterDataHandler(parser->m_handlerArg, buf,
3153
XmlEncode(n, (ICHAR *)buf));
3154
} else if (parser->m_defaultHandler)
3155
reportDefault(parser, enc, s, next);
3156
} break;
3157
case XML_TOK_XML_DECL:
3158
return XML_ERROR_MISPLACED_XML_PI;
3159
case XML_TOK_DATA_NEWLINE:
3160
if (parser->m_characterDataHandler) {
3161
XML_Char c = 0xA;
3162
parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3163
} else if (parser->m_defaultHandler)
3164
reportDefault(parser, enc, s, next);
3165
break;
3166
case XML_TOK_CDATA_SECT_OPEN: {
3167
enum XML_Error result;
3168
if (parser->m_startCdataSectionHandler)
3169
parser->m_startCdataSectionHandler(parser->m_handlerArg);
3170
/* BEGIN disabled code */
3171
/* Suppose you doing a transformation on a document that involves
3172
changing only the character data. You set up a defaultHandler
3173
and a characterDataHandler. The defaultHandler simply copies
3174
characters through. The characterDataHandler does the
3175
transformation and writes the characters out escaping them as
3176
necessary. This case will fail to work if we leave out the
3177
following two lines (because & and < inside CDATA sections will
3178
be incorrectly escaped).
3179
3180
However, now we have a start/endCdataSectionHandler, so it seems
3181
easier to let the user deal with this.
3182
*/
3183
else if ((0) && parser->m_characterDataHandler)
3184
parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3185
0);
3186
/* END disabled code */
3187
else if (parser->m_defaultHandler)
3188
reportDefault(parser, enc, s, next);
3189
result
3190
= doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3191
if (result != XML_ERROR_NONE)
3192
return result;
3193
else if (! next) {
3194
parser->m_processor = cdataSectionProcessor;
3195
return result;
3196
}
3197
} break;
3198
case XML_TOK_TRAILING_RSQB:
3199
if (haveMore) {
3200
*nextPtr = s;
3201
return XML_ERROR_NONE;
3202
}
3203
if (parser->m_characterDataHandler) {
3204
if (MUST_CONVERT(enc, s)) {
3205
ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3206
XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3207
parser->m_characterDataHandler(
3208
parser->m_handlerArg, parser->m_dataBuf,
3209
(int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3210
} else
3211
parser->m_characterDataHandler(
3212
parser->m_handlerArg, (const XML_Char *)s,
3213
(int)((const XML_Char *)end - (const XML_Char *)s));
3214
} else if (parser->m_defaultHandler)
3215
reportDefault(parser, enc, s, end);
3216
/* We are at the end of the final buffer, should we check for
3217
XML_SUSPENDED, XML_FINISHED?
3218
*/
3219
if (startTagLevel == 0) {
3220
*eventPP = end;
3221
return XML_ERROR_NO_ELEMENTS;
3222
}
3223
if (parser->m_tagLevel != startTagLevel) {
3224
*eventPP = end;
3225
return XML_ERROR_ASYNC_ENTITY;
3226
}
3227
*nextPtr = end;
3228
return XML_ERROR_NONE;
3229
case XML_TOK_DATA_CHARS: {
3230
XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3231
if (charDataHandler) {
3232
if (MUST_CONVERT(enc, s)) {
3233
for (;;) {
3234
ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3235
const enum XML_Convert_Result convert_res = XmlConvert(
3236
enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3237
*eventEndPP = s;
3238
charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3239
(int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3240
if ((convert_res == XML_CONVERT_COMPLETED)
3241
|| (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3242
break;
3243
*eventPP = s;
3244
}
3245
} else
3246
charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3247
(int)((const XML_Char *)next - (const XML_Char *)s));
3248
} else if (parser->m_defaultHandler)
3249
reportDefault(parser, enc, s, next);
3250
} break;
3251
case XML_TOK_PI:
3252
if (! reportProcessingInstruction(parser, enc, s, next))
3253
return XML_ERROR_NO_MEMORY;
3254
break;
3255
case XML_TOK_COMMENT:
3256
if (! reportComment(parser, enc, s, next))
3257
return XML_ERROR_NO_MEMORY;
3258
break;
3259
default:
3260
/* All of the tokens produced by XmlContentTok() have their own
3261
* explicit cases, so this default is not strictly necessary.
3262
* However it is a useful safety net, so we retain the code and
3263
* simply exclude it from the coverage tests.
3264
*
3265
* LCOV_EXCL_START
3266
*/
3267
if (parser->m_defaultHandler)
3268
reportDefault(parser, enc, s, next);
3269
break;
3270
/* LCOV_EXCL_STOP */
3271
}
3272
*eventPP = s = next;
3273
switch (parser->m_parsingStatus.parsing) {
3274
case XML_SUSPENDED:
3275
*nextPtr = next;
3276
return XML_ERROR_NONE;
3277
case XML_FINISHED:
3278
return XML_ERROR_ABORTED;
3279
default:;
3280
}
3281
}
3282
/* not reached */
3283
}
3284
3285
/* This function does not call free() on the allocated memory, merely
3286
* moving it to the parser's m_freeBindingList where it can be freed or
3287
* reused as appropriate.
3288
*/
3289
static void
3290
freeBindings(XML_Parser parser, BINDING *bindings) {
3291
while (bindings) {
3292
BINDING *b = bindings;
3293
3294
/* m_startNamespaceDeclHandler will have been called for this
3295
* binding in addBindings(), so call the end handler now.
3296
*/
3297
if (parser->m_endNamespaceDeclHandler)
3298
parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3299
3300
bindings = bindings->nextTagBinding;
3301
b->nextTagBinding = parser->m_freeBindingList;
3302
parser->m_freeBindingList = b;
3303
b->prefix->binding = b->prevPrefixBinding;
3304
}
3305
}
3306
3307
/* Precondition: all arguments must be non-NULL;
3308
Purpose:
3309
- normalize attributes
3310
- check attributes for well-formedness
3311
- generate namespace aware attribute names (URI, prefix)
3312
- build list of attributes for startElementHandler
3313
- default attributes
3314
- process namespace declarations (check and report them)
3315
- generate namespace aware element name (URI, prefix)
3316
*/
3317
static enum XML_Error
3318
storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3319
TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3320
enum XML_Account account) {
3321
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3322
ELEMENT_TYPE *elementType;
3323
int nDefaultAtts;
3324
const XML_Char **appAtts; /* the attribute list for the application */
3325
int attIndex = 0;
3326
int prefixLen;
3327
int i;
3328
int n;
3329
XML_Char *uri;
3330
int nPrefixes = 0;
3331
BINDING *binding;
3332
const XML_Char *localPart;
3333
3334
/* lookup the element type name */
3335
elementType
3336
= (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3337
if (! elementType) {
3338
const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3339
if (! name)
3340
return XML_ERROR_NO_MEMORY;
3341
elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3342
sizeof(ELEMENT_TYPE));
3343
if (! elementType)
3344
return XML_ERROR_NO_MEMORY;
3345
if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3346
return XML_ERROR_NO_MEMORY;
3347
}
3348
nDefaultAtts = elementType->nDefaultAtts;
3349
3350
/* get the attributes from the tokenizer */
3351
n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3352
3353
/* Detect and prevent integer overflow */
3354
if (n > INT_MAX - nDefaultAtts) {
3355
return XML_ERROR_NO_MEMORY;
3356
}
3357
3358
if (n + nDefaultAtts > parser->m_attsSize) {
3359
int oldAttsSize = parser->m_attsSize;
3360
ATTRIBUTE *temp;
3361
#ifdef XML_ATTR_INFO
3362
XML_AttrInfo *temp2;
3363
#endif
3364
3365
/* Detect and prevent integer overflow */
3366
if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3367
|| (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3368
return XML_ERROR_NO_MEMORY;
3369
}
3370
3371
parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3372
3373
/* Detect and prevent integer overflow.
3374
* The preprocessor guard addresses the "always false" warning
3375
* from -Wtype-limits on platforms where
3376
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3377
#if UINT_MAX >= SIZE_MAX
3378
if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3379
parser->m_attsSize = oldAttsSize;
3380
return XML_ERROR_NO_MEMORY;
3381
}
3382
#endif
3383
3384
temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3385
parser->m_attsSize * sizeof(ATTRIBUTE));
3386
if (temp == NULL) {
3387
parser->m_attsSize = oldAttsSize;
3388
return XML_ERROR_NO_MEMORY;
3389
}
3390
parser->m_atts = temp;
3391
#ifdef XML_ATTR_INFO
3392
/* Detect and prevent integer overflow.
3393
* The preprocessor guard addresses the "always false" warning
3394
* from -Wtype-limits on platforms where
3395
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3396
# if UINT_MAX >= SIZE_MAX
3397
if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3398
parser->m_attsSize = oldAttsSize;
3399
return XML_ERROR_NO_MEMORY;
3400
}
3401
# endif
3402
3403
temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3404
parser->m_attsSize * sizeof(XML_AttrInfo));
3405
if (temp2 == NULL) {
3406
parser->m_attsSize = oldAttsSize;
3407
return XML_ERROR_NO_MEMORY;
3408
}
3409
parser->m_attInfo = temp2;
3410
#endif
3411
if (n > oldAttsSize)
3412
XmlGetAttributes(enc, attStr, n, parser->m_atts);
3413
}
3414
3415
appAtts = (const XML_Char **)parser->m_atts;
3416
for (i = 0; i < n; i++) {
3417
ATTRIBUTE *currAtt = &parser->m_atts[i];
3418
#ifdef XML_ATTR_INFO
3419
XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3420
#endif
3421
/* add the name and value to the attribute list */
3422
ATTRIBUTE_ID *attId
3423
= getAttributeId(parser, enc, currAtt->name,
3424
currAtt->name + XmlNameLength(enc, currAtt->name));
3425
if (! attId)
3426
return XML_ERROR_NO_MEMORY;
3427
#ifdef XML_ATTR_INFO
3428
currAttInfo->nameStart
3429
= parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3430
currAttInfo->nameEnd
3431
= currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3432
currAttInfo->valueStart = parser->m_parseEndByteIndex
3433
- (parser->m_parseEndPtr - currAtt->valuePtr);
3434
currAttInfo->valueEnd = parser->m_parseEndByteIndex
3435
- (parser->m_parseEndPtr - currAtt->valueEnd);
3436
#endif
3437
/* Detect duplicate attributes by their QNames. This does not work when
3438
namespace processing is turned on and different prefixes for the same
3439
namespace are used. For this case we have a check further down.
3440
*/
3441
if ((attId->name)[-1]) {
3442
if (enc == parser->m_encoding)
3443
parser->m_eventPtr = parser->m_atts[i].name;
3444
return XML_ERROR_DUPLICATE_ATTRIBUTE;
3445
}
3446
(attId->name)[-1] = 1;
3447
appAtts[attIndex++] = attId->name;
3448
if (! parser->m_atts[i].normalized) {
3449
enum XML_Error result;
3450
XML_Bool isCdata = XML_TRUE;
3451
3452
/* figure out whether declared as other than CDATA */
3453
if (attId->maybeTokenized) {
3454
int j;
3455
for (j = 0; j < nDefaultAtts; j++) {
3456
if (attId == elementType->defaultAtts[j].id) {
3457
isCdata = elementType->defaultAtts[j].isCdata;
3458
break;
3459
}
3460
}
3461
}
3462
3463
/* normalize the attribute value */
3464
result = storeAttributeValue(
3465
parser, enc, isCdata, parser->m_atts[i].valuePtr,
3466
parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3467
if (result)
3468
return result;
3469
appAtts[attIndex] = poolStart(&parser->m_tempPool);
3470
poolFinish(&parser->m_tempPool);
3471
} else {
3472
/* the value did not need normalizing */
3473
appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3474
parser->m_atts[i].valuePtr,
3475
parser->m_atts[i].valueEnd);
3476
if (appAtts[attIndex] == 0)
3477
return XML_ERROR_NO_MEMORY;
3478
poolFinish(&parser->m_tempPool);
3479
}
3480
/* handle prefixed attribute names */
3481
if (attId->prefix) {
3482
if (attId->xmlns) {
3483
/* deal with namespace declarations here */
3484
enum XML_Error result = addBinding(parser, attId->prefix, attId,
3485
appAtts[attIndex], bindingsPtr);
3486
if (result)
3487
return result;
3488
--attIndex;
3489
} else {
3490
/* deal with other prefixed names later */
3491
attIndex++;
3492
nPrefixes++;
3493
(attId->name)[-1] = 2;
3494
}
3495
} else
3496
attIndex++;
3497
}
3498
3499
/* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3500
parser->m_nSpecifiedAtts = attIndex;
3501
if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3502
for (i = 0; i < attIndex; i += 2)
3503
if (appAtts[i] == elementType->idAtt->name) {
3504
parser->m_idAttIndex = i;
3505
break;
3506
}
3507
} else
3508
parser->m_idAttIndex = -1;
3509
3510
/* do attribute defaulting */
3511
for (i = 0; i < nDefaultAtts; i++) {
3512
const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3513
if (! (da->id->name)[-1] && da->value) {
3514
if (da->id->prefix) {
3515
if (da->id->xmlns) {
3516
enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3517
da->value, bindingsPtr);
3518
if (result)
3519
return result;
3520
} else {
3521
(da->id->name)[-1] = 2;
3522
nPrefixes++;
3523
appAtts[attIndex++] = da->id->name;
3524
appAtts[attIndex++] = da->value;
3525
}
3526
} else {
3527
(da->id->name)[-1] = 1;
3528
appAtts[attIndex++] = da->id->name;
3529
appAtts[attIndex++] = da->value;
3530
}
3531
}
3532
}
3533
appAtts[attIndex] = 0;
3534
3535
/* expand prefixed attribute names, check for duplicates,
3536
and clear flags that say whether attributes were specified */
3537
i = 0;
3538
if (nPrefixes) {
3539
int j; /* hash table index */
3540
unsigned long version = parser->m_nsAttsVersion;
3541
3542
/* Detect and prevent invalid shift */
3543
if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3544
return XML_ERROR_NO_MEMORY;
3545
}
3546
3547
unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3548
unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3549
/* size of hash table must be at least 2 * (# of prefixed attributes) */
3550
if ((nPrefixes << 1)
3551
>> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3552
NS_ATT *temp;
3553
/* hash table size must also be a power of 2 and >= 8 */
3554
while (nPrefixes >> parser->m_nsAttsPower++)
3555
;
3556
if (parser->m_nsAttsPower < 3)
3557
parser->m_nsAttsPower = 3;
3558
3559
/* Detect and prevent invalid shift */
3560
if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3561
/* Restore actual size of memory in m_nsAtts */
3562
parser->m_nsAttsPower = oldNsAttsPower;
3563
return XML_ERROR_NO_MEMORY;
3564
}
3565
3566
nsAttsSize = 1u << parser->m_nsAttsPower;
3567
3568
/* Detect and prevent integer overflow.
3569
* The preprocessor guard addresses the "always false" warning
3570
* from -Wtype-limits on platforms where
3571
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3572
#if UINT_MAX >= SIZE_MAX
3573
if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3574
/* Restore actual size of memory in m_nsAtts */
3575
parser->m_nsAttsPower = oldNsAttsPower;
3576
return XML_ERROR_NO_MEMORY;
3577
}
3578
#endif
3579
3580
temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3581
nsAttsSize * sizeof(NS_ATT));
3582
if (! temp) {
3583
/* Restore actual size of memory in m_nsAtts */
3584
parser->m_nsAttsPower = oldNsAttsPower;
3585
return XML_ERROR_NO_MEMORY;
3586
}
3587
parser->m_nsAtts = temp;
3588
version = 0; /* force re-initialization of m_nsAtts hash table */
3589
}
3590
/* using a version flag saves us from initializing m_nsAtts every time */
3591
if (! version) { /* initialize version flags when version wraps around */
3592
version = INIT_ATTS_VERSION;
3593
for (j = nsAttsSize; j != 0;)
3594
parser->m_nsAtts[--j].version = version;
3595
}
3596
parser->m_nsAttsVersion = --version;
3597
3598
/* expand prefixed names and check for duplicates */
3599
for (; i < attIndex; i += 2) {
3600
const XML_Char *s = appAtts[i];
3601
if (s[-1] == 2) { /* prefixed */
3602
ATTRIBUTE_ID *id;
3603
const BINDING *b;
3604
unsigned long uriHash;
3605
struct siphash sip_state;
3606
struct sipkey sip_key;
3607
3608
copy_salt_to_sipkey(parser, &sip_key);
3609
sip24_init(&sip_state, &sip_key);
3610
3611
((XML_Char *)s)[-1] = 0; /* clear flag */
3612
id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3613
if (! id || ! id->prefix) {
3614
/* This code is walking through the appAtts array, dealing
3615
* with (in this case) a prefixed attribute name. To be in
3616
* the array, the attribute must have already been bound, so
3617
* has to have passed through the hash table lookup once
3618
* already. That implies that an entry for it already
3619
* exists, so the lookup above will return a pointer to
3620
* already allocated memory. There is no opportunaity for
3621
* the allocator to fail, so the condition above cannot be
3622
* fulfilled.
3623
*
3624
* Since it is difficult to be certain that the above
3625
* analysis is complete, we retain the test and merely
3626
* remove the code from coverage tests.
3627
*/
3628
return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3629
}
3630
b = id->prefix->binding;
3631
if (! b)
3632
return XML_ERROR_UNBOUND_PREFIX;
3633
3634
for (j = 0; j < b->uriLen; j++) {
3635
const XML_Char c = b->uri[j];
3636
if (! poolAppendChar(&parser->m_tempPool, c))
3637
return XML_ERROR_NO_MEMORY;
3638
}
3639
3640
sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3641
3642
while (*s++ != XML_T(ASCII_COLON))
3643
;
3644
3645
sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3646
3647
do { /* copies null terminator */
3648
if (! poolAppendChar(&parser->m_tempPool, *s))
3649
return XML_ERROR_NO_MEMORY;
3650
} while (*s++);
3651
3652
uriHash = (unsigned long)sip24_final(&sip_state);
3653
3654
{ /* Check hash table for duplicate of expanded name (uriName).
3655
Derived from code in lookup(parser, HASH_TABLE *table, ...).
3656
*/
3657
unsigned char step = 0;
3658
unsigned long mask = nsAttsSize - 1;
3659
j = uriHash & mask; /* index into hash table */
3660
while (parser->m_nsAtts[j].version == version) {
3661
/* for speed we compare stored hash values first */
3662
if (uriHash == parser->m_nsAtts[j].hash) {
3663
const XML_Char *s1 = poolStart(&parser->m_tempPool);
3664
const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3665
/* s1 is null terminated, but not s2 */
3666
for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3667
;
3668
if (*s1 == 0)
3669
return XML_ERROR_DUPLICATE_ATTRIBUTE;
3670
}
3671
if (! step)
3672
step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3673
j < step ? (j += nsAttsSize - step) : (j -= step);
3674
}
3675
}
3676
3677
if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3678
parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3679
s = b->prefix->name;
3680
do {
3681
if (! poolAppendChar(&parser->m_tempPool, *s))
3682
return XML_ERROR_NO_MEMORY;
3683
} while (*s++);
3684
}
3685
3686
/* store expanded name in attribute list */
3687
s = poolStart(&parser->m_tempPool);
3688
poolFinish(&parser->m_tempPool);
3689
appAtts[i] = s;
3690
3691
/* fill empty slot with new version, uriName and hash value */
3692
parser->m_nsAtts[j].version = version;
3693
parser->m_nsAtts[j].hash = uriHash;
3694
parser->m_nsAtts[j].uriName = s;
3695
3696
if (! --nPrefixes) {
3697
i += 2;
3698
break;
3699
}
3700
} else /* not prefixed */
3701
((XML_Char *)s)[-1] = 0; /* clear flag */
3702
}
3703
}
3704
/* clear flags for the remaining attributes */
3705
for (; i < attIndex; i += 2)
3706
((XML_Char *)(appAtts[i]))[-1] = 0;
3707
for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3708
binding->attId->name[-1] = 0;
3709
3710
if (! parser->m_ns)
3711
return XML_ERROR_NONE;
3712
3713
/* expand the element type name */
3714
if (elementType->prefix) {
3715
binding = elementType->prefix->binding;
3716
if (! binding)
3717
return XML_ERROR_UNBOUND_PREFIX;
3718
localPart = tagNamePtr->str;
3719
while (*localPart++ != XML_T(ASCII_COLON))
3720
;
3721
} else if (dtd->defaultPrefix.binding) {
3722
binding = dtd->defaultPrefix.binding;
3723
localPart = tagNamePtr->str;
3724
} else
3725
return XML_ERROR_NONE;
3726
prefixLen = 0;
3727
if (parser->m_ns_triplets && binding->prefix->name) {
3728
for (; binding->prefix->name[prefixLen++];)
3729
; /* prefixLen includes null terminator */
3730
}
3731
tagNamePtr->localPart = localPart;
3732
tagNamePtr->uriLen = binding->uriLen;
3733
tagNamePtr->prefix = binding->prefix->name;
3734
tagNamePtr->prefixLen = prefixLen;
3735
for (i = 0; localPart[i++];)
3736
; /* i includes null terminator */
3737
3738
/* Detect and prevent integer overflow */
3739
if (binding->uriLen > INT_MAX - prefixLen
3740
|| i > INT_MAX - (binding->uriLen + prefixLen)) {
3741
return XML_ERROR_NO_MEMORY;
3742
}
3743
3744
n = i + binding->uriLen + prefixLen;
3745
if (n > binding->uriAlloc) {
3746
TAG *p;
3747
3748
/* Detect and prevent integer overflow */
3749
if (n > INT_MAX - EXPAND_SPARE) {
3750
return XML_ERROR_NO_MEMORY;
3751
}
3752
/* Detect and prevent integer overflow.
3753
* The preprocessor guard addresses the "always false" warning
3754
* from -Wtype-limits on platforms where
3755
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3756
#if UINT_MAX >= SIZE_MAX
3757
if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3758
return XML_ERROR_NO_MEMORY;
3759
}
3760
#endif
3761
3762
uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3763
if (! uri)
3764
return XML_ERROR_NO_MEMORY;
3765
binding->uriAlloc = n + EXPAND_SPARE;
3766
memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3767
for (p = parser->m_tagStack; p; p = p->parent)
3768
if (p->name.str == binding->uri)
3769
p->name.str = uri;
3770
FREE(parser, binding->uri);
3771
binding->uri = uri;
3772
}
3773
/* if m_namespaceSeparator != '\0' then uri includes it already */
3774
uri = binding->uri + binding->uriLen;
3775
memcpy(uri, localPart, i * sizeof(XML_Char));
3776
/* we always have a namespace separator between localPart and prefix */
3777
if (prefixLen) {
3778
uri += i - 1;
3779
*uri = parser->m_namespaceSeparator; /* replace null terminator */
3780
memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3781
}
3782
tagNamePtr->str = binding->uri;
3783
return XML_ERROR_NONE;
3784
}
3785
3786
static XML_Bool
3787
is_rfc3986_uri_char(XML_Char candidate) {
3788
// For the RFC 3986 ANBF grammar see
3789
// https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3790
3791
switch (candidate) {
3792
// From rule "ALPHA" (uppercase half)
3793
case 'A':
3794
case 'B':
3795
case 'C':
3796
case 'D':
3797
case 'E':
3798
case 'F':
3799
case 'G':
3800
case 'H':
3801
case 'I':
3802
case 'J':
3803
case 'K':
3804
case 'L':
3805
case 'M':
3806
case 'N':
3807
case 'O':
3808
case 'P':
3809
case 'Q':
3810
case 'R':
3811
case 'S':
3812
case 'T':
3813
case 'U':
3814
case 'V':
3815
case 'W':
3816
case 'X':
3817
case 'Y':
3818
case 'Z':
3819
3820
// From rule "ALPHA" (lowercase half)
3821
case 'a':
3822
case 'b':
3823
case 'c':
3824
case 'd':
3825
case 'e':
3826
case 'f':
3827
case 'g':
3828
case 'h':
3829
case 'i':
3830
case 'j':
3831
case 'k':
3832
case 'l':
3833
case 'm':
3834
case 'n':
3835
case 'o':
3836
case 'p':
3837
case 'q':
3838
case 'r':
3839
case 's':
3840
case 't':
3841
case 'u':
3842
case 'v':
3843
case 'w':
3844
case 'x':
3845
case 'y':
3846
case 'z':
3847
3848
// From rule "DIGIT"
3849
case '0':
3850
case '1':
3851
case '2':
3852
case '3':
3853
case '4':
3854
case '5':
3855
case '6':
3856
case '7':
3857
case '8':
3858
case '9':
3859
3860
// From rule "pct-encoded"
3861
case '%':
3862
3863
// From rule "unreserved"
3864
case '-':
3865
case '.':
3866
case '_':
3867
case '~':
3868
3869
// From rule "gen-delims"
3870
case ':':
3871
case '/':
3872
case '?':
3873
case '#':
3874
case '[':
3875
case ']':
3876
case '@':
3877
3878
// From rule "sub-delims"
3879
case '!':
3880
case '$':
3881
case '&':
3882
case '\'':
3883
case '(':
3884
case ')':
3885
case '*':
3886
case '+':
3887
case ',':
3888
case ';':
3889
case '=':
3890
return XML_TRUE;
3891
3892
default:
3893
return XML_FALSE;
3894
}
3895
}
3896
3897
/* addBinding() overwrites the value of prefix->binding without checking.
3898
Therefore one must keep track of the old value outside of addBinding().
3899
*/
3900
static enum XML_Error
3901
addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3902
const XML_Char *uri, BINDING **bindingsPtr) {
3903
// "http://www.w3.org/XML/1998/namespace"
3904
static const XML_Char xmlNamespace[]
3905
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
3906
ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
3907
ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
3908
ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
3909
ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
3910
ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
3911
ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3912
ASCII_e, '\0'};
3913
static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3914
// "http://www.w3.org/2000/xmlns/"
3915
static const XML_Char xmlnsNamespace[]
3916
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
3917
ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3918
ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
3919
ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
3920
ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
3921
static const int xmlnsLen
3922
= (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
3923
3924
XML_Bool mustBeXML = XML_FALSE;
3925
XML_Bool isXML = XML_TRUE;
3926
XML_Bool isXMLNS = XML_TRUE;
3927
3928
BINDING *b;
3929
int len;
3930
3931
/* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3932
if (*uri == XML_T('\0') && prefix->name)
3933
return XML_ERROR_UNDECLARING_PREFIX;
3934
3935
if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
3936
&& prefix->name[1] == XML_T(ASCII_m)
3937
&& prefix->name[2] == XML_T(ASCII_l)) {
3938
/* Not allowed to bind xmlns */
3939
if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
3940
&& prefix->name[5] == XML_T('\0'))
3941
return XML_ERROR_RESERVED_PREFIX_XMLNS;
3942
3943
if (prefix->name[3] == XML_T('\0'))
3944
mustBeXML = XML_TRUE;
3945
}
3946
3947
for (len = 0; uri[len]; len++) {
3948
if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3949
isXML = XML_FALSE;
3950
3951
if (! mustBeXML && isXMLNS
3952
&& (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3953
isXMLNS = XML_FALSE;
3954
3955
// NOTE: While Expat does not validate namespace URIs against RFC 3986
3956
// today (and is not REQUIRED to do so with regard to the XML 1.0
3957
// namespaces specification) we have to at least make sure, that
3958
// the application on top of Expat (that is likely splitting expanded
3959
// element names ("qualified names") of form
3960
// "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
3961
// in its element handler code) cannot be confused by an attacker
3962
// putting additional namespace separator characters into namespace
3963
// declarations. That would be ambiguous and not to be expected.
3964
//
3965
// While the HTML API docs of function XML_ParserCreateNS have been
3966
// advising against use of a namespace separator character that can
3967
// appear in a URI for >20 years now, some widespread applications
3968
// are using URI characters (':' (colon) in particular) for a
3969
// namespace separator, in practice. To keep these applications
3970
// functional, we only reject namespaces URIs containing the
3971
// application-chosen namespace separator if the chosen separator
3972
// is a non-URI character with regard to RFC 3986.
3973
if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
3974
&& ! is_rfc3986_uri_char(uri[len])) {
3975
return XML_ERROR_SYNTAX;
3976
}
3977
}
3978
isXML = isXML && len == xmlLen;
3979
isXMLNS = isXMLNS && len == xmlnsLen;
3980
3981
if (mustBeXML != isXML)
3982
return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3983
: XML_ERROR_RESERVED_NAMESPACE_URI;
3984
3985
if (isXMLNS)
3986
return XML_ERROR_RESERVED_NAMESPACE_URI;
3987
3988
if (parser->m_namespaceSeparator)
3989
len++;
3990
if (parser->m_freeBindingList) {
3991
b = parser->m_freeBindingList;
3992
if (len > b->uriAlloc) {
3993
/* Detect and prevent integer overflow */
3994
if (len > INT_MAX - EXPAND_SPARE) {
3995
return XML_ERROR_NO_MEMORY;
3996
}
3997
3998
/* Detect and prevent integer overflow.
3999
* The preprocessor guard addresses the "always false" warning
4000
* from -Wtype-limits on platforms where
4001
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4002
#if UINT_MAX >= SIZE_MAX
4003
if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4004
return XML_ERROR_NO_MEMORY;
4005
}
4006
#endif
4007
4008
XML_Char *temp = (XML_Char *)REALLOC(
4009
parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4010
if (temp == NULL)
4011
return XML_ERROR_NO_MEMORY;
4012
b->uri = temp;
4013
b->uriAlloc = len + EXPAND_SPARE;
4014
}
4015
parser->m_freeBindingList = b->nextTagBinding;
4016
} else {
4017
b = (BINDING *)MALLOC(parser, sizeof(BINDING));
4018
if (! b)
4019
return XML_ERROR_NO_MEMORY;
4020
4021
/* Detect and prevent integer overflow */
4022
if (len > INT_MAX - EXPAND_SPARE) {
4023
return XML_ERROR_NO_MEMORY;
4024
}
4025
/* Detect and prevent integer overflow.
4026
* The preprocessor guard addresses the "always false" warning
4027
* from -Wtype-limits on platforms where
4028
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4029
#if UINT_MAX >= SIZE_MAX
4030
if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4031
return XML_ERROR_NO_MEMORY;
4032
}
4033
#endif
4034
4035
b->uri
4036
= (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4037
if (! b->uri) {
4038
FREE(parser, b);
4039
return XML_ERROR_NO_MEMORY;
4040
}
4041
b->uriAlloc = len + EXPAND_SPARE;
4042
}
4043
b->uriLen = len;
4044
memcpy(b->uri, uri, len * sizeof(XML_Char));
4045
if (parser->m_namespaceSeparator)
4046
b->uri[len - 1] = parser->m_namespaceSeparator;
4047
b->prefix = prefix;
4048
b->attId = attId;
4049
b->prevPrefixBinding = prefix->binding;
4050
/* NULL binding when default namespace undeclared */
4051
if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4052
prefix->binding = NULL;
4053
else
4054
prefix->binding = b;
4055
b->nextTagBinding = *bindingsPtr;
4056
*bindingsPtr = b;
4057
/* if attId == NULL then we are not starting a namespace scope */
4058
if (attId && parser->m_startNamespaceDeclHandler)
4059
parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4060
prefix->binding ? uri : 0);
4061
return XML_ERROR_NONE;
4062
}
4063
4064
/* The idea here is to avoid using stack for each CDATA section when
4065
the whole file is parsed with one call.
4066
*/
4067
static enum XML_Error PTRCALL
4068
cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4069
const char **endPtr) {
4070
enum XML_Error result = doCdataSection(
4071
parser, parser->m_encoding, &start, end, endPtr,
4072
(XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4073
if (result != XML_ERROR_NONE)
4074
return result;
4075
if (start) {
4076
if (parser->m_parentParser) { /* we are parsing an external entity */
4077
parser->m_processor = externalEntityContentProcessor;
4078
return externalEntityContentProcessor(parser, start, end, endPtr);
4079
} else {
4080
parser->m_processor = contentProcessor;
4081
return contentProcessor(parser, start, end, endPtr);
4082
}
4083
}
4084
return result;
4085
}
4086
4087
/* startPtr gets set to non-null if the section is closed, and to null if
4088
the section is not yet closed.
4089
*/
4090
static enum XML_Error
4091
doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4092
const char *end, const char **nextPtr, XML_Bool haveMore,
4093
enum XML_Account account) {
4094
const char *s = *startPtr;
4095
const char **eventPP;
4096
const char **eventEndPP;
4097
if (enc == parser->m_encoding) {
4098
eventPP = &parser->m_eventPtr;
4099
*eventPP = s;
4100
eventEndPP = &parser->m_eventEndPtr;
4101
} else {
4102
eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4103
eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4104
}
4105
*eventPP = s;
4106
*startPtr = NULL;
4107
4108
for (;;) {
4109
const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4110
int tok = XmlCdataSectionTok(enc, s, end, &next);
4111
#if XML_GE == 1
4112
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4113
accountingOnAbort(parser);
4114
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4115
}
4116
#else
4117
UNUSED_P(account);
4118
#endif
4119
*eventEndPP = next;
4120
switch (tok) {
4121
case XML_TOK_CDATA_SECT_CLOSE:
4122
if (parser->m_endCdataSectionHandler)
4123
parser->m_endCdataSectionHandler(parser->m_handlerArg);
4124
/* BEGIN disabled code */
4125
/* see comment under XML_TOK_CDATA_SECT_OPEN */
4126
else if ((0) && parser->m_characterDataHandler)
4127
parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4128
0);
4129
/* END disabled code */
4130
else if (parser->m_defaultHandler)
4131
reportDefault(parser, enc, s, next);
4132
*startPtr = next;
4133
*nextPtr = next;
4134
if (parser->m_parsingStatus.parsing == XML_FINISHED)
4135
return XML_ERROR_ABORTED;
4136
else
4137
return XML_ERROR_NONE;
4138
case XML_TOK_DATA_NEWLINE:
4139
if (parser->m_characterDataHandler) {
4140
XML_Char c = 0xA;
4141
parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4142
} else if (parser->m_defaultHandler)
4143
reportDefault(parser, enc, s, next);
4144
break;
4145
case XML_TOK_DATA_CHARS: {
4146
XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4147
if (charDataHandler) {
4148
if (MUST_CONVERT(enc, s)) {
4149
for (;;) {
4150
ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4151
const enum XML_Convert_Result convert_res = XmlConvert(
4152
enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4153
*eventEndPP = next;
4154
charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4155
(int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4156
if ((convert_res == XML_CONVERT_COMPLETED)
4157
|| (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4158
break;
4159
*eventPP = s;
4160
}
4161
} else
4162
charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4163
(int)((const XML_Char *)next - (const XML_Char *)s));
4164
} else if (parser->m_defaultHandler)
4165
reportDefault(parser, enc, s, next);
4166
} break;
4167
case XML_TOK_INVALID:
4168
*eventPP = next;
4169
return XML_ERROR_INVALID_TOKEN;
4170
case XML_TOK_PARTIAL_CHAR:
4171
if (haveMore) {
4172
*nextPtr = s;
4173
return XML_ERROR_NONE;
4174
}
4175
return XML_ERROR_PARTIAL_CHAR;
4176
case XML_TOK_PARTIAL:
4177
case XML_TOK_NONE:
4178
if (haveMore) {
4179
*nextPtr = s;
4180
return XML_ERROR_NONE;
4181
}
4182
return XML_ERROR_UNCLOSED_CDATA_SECTION;
4183
default:
4184
/* Every token returned by XmlCdataSectionTok() has its own
4185
* explicit case, so this default case will never be executed.
4186
* We retain it as a safety net and exclude it from the coverage
4187
* statistics.
4188
*
4189
* LCOV_EXCL_START
4190
*/
4191
*eventPP = next;
4192
return XML_ERROR_UNEXPECTED_STATE;
4193
/* LCOV_EXCL_STOP */
4194
}
4195
4196
*eventPP = s = next;
4197
switch (parser->m_parsingStatus.parsing) {
4198
case XML_SUSPENDED:
4199
*nextPtr = next;
4200
return XML_ERROR_NONE;
4201
case XML_FINISHED:
4202
return XML_ERROR_ABORTED;
4203
default:;
4204
}
4205
}
4206
/* not reached */
4207
}
4208
4209
#ifdef XML_DTD
4210
4211
/* The idea here is to avoid using stack for each IGNORE section when
4212
the whole file is parsed with one call.
4213
*/
4214
static enum XML_Error PTRCALL
4215
ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4216
const char **endPtr) {
4217
enum XML_Error result
4218
= doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4219
(XML_Bool)! parser->m_parsingStatus.finalBuffer);
4220
if (result != XML_ERROR_NONE)
4221
return result;
4222
if (start) {
4223
parser->m_processor = prologProcessor;
4224
return prologProcessor(parser, start, end, endPtr);
4225
}
4226
return result;
4227
}
4228
4229
/* startPtr gets set to non-null is the section is closed, and to null
4230
if the section is not yet closed.
4231
*/
4232
static enum XML_Error
4233
doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4234
const char *end, const char **nextPtr, XML_Bool haveMore) {
4235
const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4236
int tok;
4237
const char *s = *startPtr;
4238
const char **eventPP;
4239
const char **eventEndPP;
4240
if (enc == parser->m_encoding) {
4241
eventPP = &parser->m_eventPtr;
4242
*eventPP = s;
4243
eventEndPP = &parser->m_eventEndPtr;
4244
} else {
4245
/* It's not entirely clear, but it seems the following two lines
4246
* of code cannot be executed. The only occasions on which 'enc'
4247
* is not 'encoding' are when this function is called
4248
* from the internal entity processing, and IGNORE sections are an
4249
* error in internal entities.
4250
*
4251
* Since it really isn't clear that this is true, we keep the code
4252
* and just remove it from our coverage tests.
4253
*
4254
* LCOV_EXCL_START
4255
*/
4256
eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4257
eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4258
/* LCOV_EXCL_STOP */
4259
}
4260
*eventPP = s;
4261
*startPtr = NULL;
4262
tok = XmlIgnoreSectionTok(enc, s, end, &next);
4263
# if XML_GE == 1
4264
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4265
XML_ACCOUNT_DIRECT)) {
4266
accountingOnAbort(parser);
4267
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4268
}
4269
# endif
4270
*eventEndPP = next;
4271
switch (tok) {
4272
case XML_TOK_IGNORE_SECT:
4273
if (parser->m_defaultHandler)
4274
reportDefault(parser, enc, s, next);
4275
*startPtr = next;
4276
*nextPtr = next;
4277
if (parser->m_parsingStatus.parsing == XML_FINISHED)
4278
return XML_ERROR_ABORTED;
4279
else
4280
return XML_ERROR_NONE;
4281
case XML_TOK_INVALID:
4282
*eventPP = next;
4283
return XML_ERROR_INVALID_TOKEN;
4284
case XML_TOK_PARTIAL_CHAR:
4285
if (haveMore) {
4286
*nextPtr = s;
4287
return XML_ERROR_NONE;
4288
}
4289
return XML_ERROR_PARTIAL_CHAR;
4290
case XML_TOK_PARTIAL:
4291
case XML_TOK_NONE:
4292
if (haveMore) {
4293
*nextPtr = s;
4294
return XML_ERROR_NONE;
4295
}
4296
return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4297
default:
4298
/* All of the tokens that XmlIgnoreSectionTok() returns have
4299
* explicit cases to handle them, so this default case is never
4300
* executed. We keep it as a safety net anyway, and remove it
4301
* from our test coverage statistics.
4302
*
4303
* LCOV_EXCL_START
4304
*/
4305
*eventPP = next;
4306
return XML_ERROR_UNEXPECTED_STATE;
4307
/* LCOV_EXCL_STOP */
4308
}
4309
/* not reached */
4310
}
4311
4312
#endif /* XML_DTD */
4313
4314
static enum XML_Error
4315
initializeEncoding(XML_Parser parser) {
4316
const char *s;
4317
#ifdef XML_UNICODE
4318
char encodingBuf[128];
4319
/* See comments about `protocolEncodingName` in parserInit() */
4320
if (! parser->m_protocolEncodingName)
4321
s = NULL;
4322
else {
4323
int i;
4324
for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4325
if (i == sizeof(encodingBuf) - 1
4326
|| (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4327
encodingBuf[0] = '\0';
4328
break;
4329
}
4330
encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4331
}
4332
encodingBuf[i] = '\0';
4333
s = encodingBuf;
4334
}
4335
#else
4336
s = parser->m_protocolEncodingName;
4337
#endif
4338
if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4339
&parser->m_initEncoding, &parser->m_encoding, s))
4340
return XML_ERROR_NONE;
4341
return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4342
}
4343
4344
static enum XML_Error
4345
processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4346
const char *next) {
4347
const char *encodingName = NULL;
4348
const XML_Char *storedEncName = NULL;
4349
const ENCODING *newEncoding = NULL;
4350
const char *version = NULL;
4351
const char *versionend = NULL;
4352
const XML_Char *storedversion = NULL;
4353
int standalone = -1;
4354
4355
#if XML_GE == 1
4356
if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4357
XML_ACCOUNT_DIRECT)) {
4358
accountingOnAbort(parser);
4359
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4360
}
4361
#endif
4362
4363
if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4364
isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4365
&version, &versionend, &encodingName, &newEncoding, &standalone)) {
4366
if (isGeneralTextEntity)
4367
return XML_ERROR_TEXT_DECL;
4368
else
4369
return XML_ERROR_XML_DECL;
4370
}
4371
if (! isGeneralTextEntity && standalone == 1) {
4372
parser->m_dtd->standalone = XML_TRUE;
4373
#ifdef XML_DTD
4374
if (parser->m_paramEntityParsing
4375
== XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4376
parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4377
#endif /* XML_DTD */
4378
}
4379
if (parser->m_xmlDeclHandler) {
4380
if (encodingName != NULL) {
4381
storedEncName = poolStoreString(
4382
&parser->m_temp2Pool, parser->m_encoding, encodingName,
4383
encodingName + XmlNameLength(parser->m_encoding, encodingName));
4384
if (! storedEncName)
4385
return XML_ERROR_NO_MEMORY;
4386
poolFinish(&parser->m_temp2Pool);
4387
}
4388
if (version) {
4389
storedversion
4390
= poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4391
versionend - parser->m_encoding->minBytesPerChar);
4392
if (! storedversion)
4393
return XML_ERROR_NO_MEMORY;
4394
}
4395
parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4396
standalone);
4397
} else if (parser->m_defaultHandler)
4398
reportDefault(parser, parser->m_encoding, s, next);
4399
if (parser->m_protocolEncodingName == NULL) {
4400
if (newEncoding) {
4401
/* Check that the specified encoding does not conflict with what
4402
* the parser has already deduced. Do we have the same number
4403
* of bytes in the smallest representation of a character? If
4404
* this is UTF-16, is it the same endianness?
4405
*/
4406
if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4407
|| (newEncoding->minBytesPerChar == 2
4408
&& newEncoding != parser->m_encoding)) {
4409
parser->m_eventPtr = encodingName;
4410
return XML_ERROR_INCORRECT_ENCODING;
4411
}
4412
parser->m_encoding = newEncoding;
4413
} else if (encodingName) {
4414
enum XML_Error result;
4415
if (! storedEncName) {
4416
storedEncName = poolStoreString(
4417
&parser->m_temp2Pool, parser->m_encoding, encodingName,
4418
encodingName + XmlNameLength(parser->m_encoding, encodingName));
4419
if (! storedEncName)
4420
return XML_ERROR_NO_MEMORY;
4421
}
4422
result = handleUnknownEncoding(parser, storedEncName);
4423
poolClear(&parser->m_temp2Pool);
4424
if (result == XML_ERROR_UNKNOWN_ENCODING)
4425
parser->m_eventPtr = encodingName;
4426
return result;
4427
}
4428
}
4429
4430
if (storedEncName || storedversion)
4431
poolClear(&parser->m_temp2Pool);
4432
4433
return XML_ERROR_NONE;
4434
}
4435
4436
static enum XML_Error
4437
handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4438
if (parser->m_unknownEncodingHandler) {
4439
XML_Encoding info;
4440
int i;
4441
for (i = 0; i < 256; i++)
4442
info.map[i] = -1;
4443
info.convert = NULL;
4444
info.data = NULL;
4445
info.release = NULL;
4446
if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4447
encodingName, &info)) {
4448
ENCODING *enc;
4449
parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4450
if (! parser->m_unknownEncodingMem) {
4451
if (info.release)
4452
info.release(info.data);
4453
return XML_ERROR_NO_MEMORY;
4454
}
4455
enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4456
parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4457
if (enc) {
4458
parser->m_unknownEncodingData = info.data;
4459
parser->m_unknownEncodingRelease = info.release;
4460
parser->m_encoding = enc;
4461
return XML_ERROR_NONE;
4462
}
4463
}
4464
if (info.release != NULL)
4465
info.release(info.data);
4466
}
4467
return XML_ERROR_UNKNOWN_ENCODING;
4468
}
4469
4470
static enum XML_Error PTRCALL
4471
prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4472
const char **nextPtr) {
4473
enum XML_Error result = initializeEncoding(parser);
4474
if (result != XML_ERROR_NONE)
4475
return result;
4476
parser->m_processor = prologProcessor;
4477
return prologProcessor(parser, s, end, nextPtr);
4478
}
4479
4480
#ifdef XML_DTD
4481
4482
static enum XML_Error PTRCALL
4483
externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4484
const char **nextPtr) {
4485
enum XML_Error result = initializeEncoding(parser);
4486
if (result != XML_ERROR_NONE)
4487
return result;
4488
4489
/* we know now that XML_Parse(Buffer) has been called,
4490
so we consider the external parameter entity read */
4491
parser->m_dtd->paramEntityRead = XML_TRUE;
4492
4493
if (parser->m_prologState.inEntityValue) {
4494
parser->m_processor = entityValueInitProcessor;
4495
return entityValueInitProcessor(parser, s, end, nextPtr);
4496
} else {
4497
parser->m_processor = externalParEntProcessor;
4498
return externalParEntProcessor(parser, s, end, nextPtr);
4499
}
4500
}
4501
4502
static enum XML_Error PTRCALL
4503
entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4504
const char **nextPtr) {
4505
int tok;
4506
const char *start = s;
4507
const char *next = start;
4508
parser->m_eventPtr = start;
4509
4510
for (;;) {
4511
tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4512
/* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4513
- storeEntityValue
4514
- processXmlDecl
4515
*/
4516
parser->m_eventEndPtr = next;
4517
if (tok <= 0) {
4518
if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4519
*nextPtr = s;
4520
return XML_ERROR_NONE;
4521
}
4522
switch (tok) {
4523
case XML_TOK_INVALID:
4524
return XML_ERROR_INVALID_TOKEN;
4525
case XML_TOK_PARTIAL:
4526
return XML_ERROR_UNCLOSED_TOKEN;
4527
case XML_TOK_PARTIAL_CHAR:
4528
return XML_ERROR_PARTIAL_CHAR;
4529
case XML_TOK_NONE: /* start == end */
4530
default:
4531
break;
4532
}
4533
/* found end of entity value - can store it now */
4534
return storeEntityValue(parser, parser->m_encoding, s, end,
4535
XML_ACCOUNT_DIRECT);
4536
} else if (tok == XML_TOK_XML_DECL) {
4537
enum XML_Error result;
4538
result = processXmlDecl(parser, 0, start, next);
4539
if (result != XML_ERROR_NONE)
4540
return result;
4541
/* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
4542
* that to happen, a parameter entity parsing handler must have attempted
4543
* to suspend the parser, which fails and raises an error. The parser can
4544
* be aborted, but can't be suspended.
4545
*/
4546
if (parser->m_parsingStatus.parsing == XML_FINISHED)
4547
return XML_ERROR_ABORTED;
4548
*nextPtr = next;
4549
/* stop scanning for text declaration - we found one */
4550
parser->m_processor = entityValueProcessor;
4551
return entityValueProcessor(parser, next, end, nextPtr);
4552
}
4553
/* XmlPrologTok has now set the encoding based on the BOM it found, and we
4554
must move s and nextPtr forward to consume the BOM.
4555
4556
If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
4557
would leave the BOM in the buffer and return. On the next call to this
4558
function, our XmlPrologTok call would return XML_TOK_INVALID, since it
4559
is not valid to have multiple BOMs.
4560
*/
4561
else if (tok == XML_TOK_BOM) {
4562
# if XML_GE == 1
4563
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4564
XML_ACCOUNT_DIRECT)) {
4565
accountingOnAbort(parser);
4566
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4567
}
4568
# endif
4569
4570
*nextPtr = next;
4571
s = next;
4572
}
4573
/* If we get this token, we have the start of what might be a
4574
normal tag, but not a declaration (i.e. it doesn't begin with
4575
"<!"). In a DTD context, that isn't legal.
4576
*/
4577
else if (tok == XML_TOK_INSTANCE_START) {
4578
*nextPtr = next;
4579
return XML_ERROR_SYNTAX;
4580
}
4581
start = next;
4582
parser->m_eventPtr = start;
4583
}
4584
}
4585
4586
static enum XML_Error PTRCALL
4587
externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4588
const char **nextPtr) {
4589
const char *next = s;
4590
int tok;
4591
4592
tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4593
if (tok <= 0) {
4594
if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4595
*nextPtr = s;
4596
return XML_ERROR_NONE;
4597
}
4598
switch (tok) {
4599
case XML_TOK_INVALID:
4600
return XML_ERROR_INVALID_TOKEN;
4601
case XML_TOK_PARTIAL:
4602
return XML_ERROR_UNCLOSED_TOKEN;
4603
case XML_TOK_PARTIAL_CHAR:
4604
return XML_ERROR_PARTIAL_CHAR;
4605
case XML_TOK_NONE: /* start == end */
4606
default:
4607
break;
4608
}
4609
}
4610
/* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4611
However, when parsing an external subset, doProlog will not accept a BOM
4612
as valid, and report a syntax error, so we have to skip the BOM, and
4613
account for the BOM bytes.
4614
*/
4615
else if (tok == XML_TOK_BOM) {
4616
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4617
XML_ACCOUNT_DIRECT)) {
4618
accountingOnAbort(parser);
4619
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4620
}
4621
4622
s = next;
4623
tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4624
}
4625
4626
parser->m_processor = prologProcessor;
4627
return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4628
(XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4629
XML_ACCOUNT_DIRECT);
4630
}
4631
4632
static enum XML_Error PTRCALL
4633
entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4634
const char **nextPtr) {
4635
const char *start = s;
4636
const char *next = s;
4637
const ENCODING *enc = parser->m_encoding;
4638
int tok;
4639
4640
for (;;) {
4641
tok = XmlPrologTok(enc, start, end, &next);
4642
/* Note: These bytes are accounted later in:
4643
- storeEntityValue
4644
*/
4645
if (tok <= 0) {
4646
if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4647
*nextPtr = s;
4648
return XML_ERROR_NONE;
4649
}
4650
switch (tok) {
4651
case XML_TOK_INVALID:
4652
return XML_ERROR_INVALID_TOKEN;
4653
case XML_TOK_PARTIAL:
4654
return XML_ERROR_UNCLOSED_TOKEN;
4655
case XML_TOK_PARTIAL_CHAR:
4656
return XML_ERROR_PARTIAL_CHAR;
4657
case XML_TOK_NONE: /* start == end */
4658
default:
4659
break;
4660
}
4661
/* found end of entity value - can store it now */
4662
return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
4663
}
4664
start = next;
4665
}
4666
}
4667
4668
#endif /* XML_DTD */
4669
4670
static enum XML_Error PTRCALL
4671
prologProcessor(XML_Parser parser, const char *s, const char *end,
4672
const char **nextPtr) {
4673
const char *next = s;
4674
int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4675
return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4676
(XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4677
XML_ACCOUNT_DIRECT);
4678
}
4679
4680
static enum XML_Error
4681
doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4682
int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4683
XML_Bool allowClosingDoctype, enum XML_Account account) {
4684
#ifdef XML_DTD
4685
static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4686
#endif /* XML_DTD */
4687
static const XML_Char atypeCDATA[]
4688
= {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4689
static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4690
static const XML_Char atypeIDREF[]
4691
= {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4692
static const XML_Char atypeIDREFS[]
4693
= {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4694
static const XML_Char atypeENTITY[]
4695
= {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4696
static const XML_Char atypeENTITIES[]
4697
= {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4698
ASCII_I, ASCII_E, ASCII_S, '\0'};
4699
static const XML_Char atypeNMTOKEN[]
4700
= {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4701
static const XML_Char atypeNMTOKENS[]
4702
= {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4703
ASCII_E, ASCII_N, ASCII_S, '\0'};
4704
static const XML_Char notationPrefix[]
4705
= {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
4706
ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4707
static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4708
static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4709
4710
#ifndef XML_DTD
4711
UNUSED_P(account);
4712
#endif
4713
4714
/* save one level of indirection */
4715
DTD *const dtd = parser->m_dtd;
4716
4717
const char **eventPP;
4718
const char **eventEndPP;
4719
enum XML_Content_Quant quant;
4720
4721
if (enc == parser->m_encoding) {
4722
eventPP = &parser->m_eventPtr;
4723
eventEndPP = &parser->m_eventEndPtr;
4724
} else {
4725
eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4726
eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4727
}
4728
4729
for (;;) {
4730
int role;
4731
XML_Bool handleDefault = XML_TRUE;
4732
*eventPP = s;
4733
*eventEndPP = next;
4734
if (tok <= 0) {
4735
if (haveMore && tok != XML_TOK_INVALID) {
4736
*nextPtr = s;
4737
return XML_ERROR_NONE;
4738
}
4739
switch (tok) {
4740
case XML_TOK_INVALID:
4741
*eventPP = next;
4742
return XML_ERROR_INVALID_TOKEN;
4743
case XML_TOK_PARTIAL:
4744
return XML_ERROR_UNCLOSED_TOKEN;
4745
case XML_TOK_PARTIAL_CHAR:
4746
return XML_ERROR_PARTIAL_CHAR;
4747
case -XML_TOK_PROLOG_S:
4748
tok = -tok;
4749
break;
4750
case XML_TOK_NONE:
4751
#ifdef XML_DTD
4752
/* for internal PE NOT referenced between declarations */
4753
if (enc != parser->m_encoding
4754
&& ! parser->m_openInternalEntities->betweenDecl) {
4755
*nextPtr = s;
4756
return XML_ERROR_NONE;
4757
}
4758
/* WFC: PE Between Declarations - must check that PE contains
4759
complete markup, not only for external PEs, but also for
4760
internal PEs if the reference occurs between declarations.
4761
*/
4762
if (parser->m_isParamEntity || enc != parser->m_encoding) {
4763
if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4764
== XML_ROLE_ERROR)
4765
return XML_ERROR_INCOMPLETE_PE;
4766
*nextPtr = s;
4767
return XML_ERROR_NONE;
4768
}
4769
#endif /* XML_DTD */
4770
return XML_ERROR_NO_ELEMENTS;
4771
default:
4772
tok = -tok;
4773
next = end;
4774
break;
4775
}
4776
}
4777
role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4778
#if XML_GE == 1
4779
switch (role) {
4780
case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4781
case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
4782
# ifdef XML_DTD
4783
case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
4784
# endif
4785
break;
4786
default:
4787
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4788
accountingOnAbort(parser);
4789
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4790
}
4791
}
4792
#endif
4793
switch (role) {
4794
case XML_ROLE_XML_DECL: {
4795
enum XML_Error result = processXmlDecl(parser, 0, s, next);
4796
if (result != XML_ERROR_NONE)
4797
return result;
4798
enc = parser->m_encoding;
4799
handleDefault = XML_FALSE;
4800
} break;
4801
case XML_ROLE_DOCTYPE_NAME:
4802
if (parser->m_startDoctypeDeclHandler) {
4803
parser->m_doctypeName
4804
= poolStoreString(&parser->m_tempPool, enc, s, next);
4805
if (! parser->m_doctypeName)
4806
return XML_ERROR_NO_MEMORY;
4807
poolFinish(&parser->m_tempPool);
4808
parser->m_doctypePubid = NULL;
4809
handleDefault = XML_FALSE;
4810
}
4811
parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4812
break;
4813
case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4814
if (parser->m_startDoctypeDeclHandler) {
4815
parser->m_startDoctypeDeclHandler(
4816
parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4817
parser->m_doctypePubid, 1);
4818
parser->m_doctypeName = NULL;
4819
poolClear(&parser->m_tempPool);
4820
handleDefault = XML_FALSE;
4821
}
4822
break;
4823
#ifdef XML_DTD
4824
case XML_ROLE_TEXT_DECL: {
4825
enum XML_Error result = processXmlDecl(parser, 1, s, next);
4826
if (result != XML_ERROR_NONE)
4827
return result;
4828
enc = parser->m_encoding;
4829
handleDefault = XML_FALSE;
4830
} break;
4831
#endif /* XML_DTD */
4832
case XML_ROLE_DOCTYPE_PUBLIC_ID:
4833
#ifdef XML_DTD
4834
parser->m_useForeignDTD = XML_FALSE;
4835
parser->m_declEntity = (ENTITY *)lookup(
4836
parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4837
if (! parser->m_declEntity)
4838
return XML_ERROR_NO_MEMORY;
4839
#endif /* XML_DTD */
4840
dtd->hasParamEntityRefs = XML_TRUE;
4841
if (parser->m_startDoctypeDeclHandler) {
4842
XML_Char *pubId;
4843
if (! XmlIsPublicId(enc, s, next, eventPP))
4844
return XML_ERROR_PUBLICID;
4845
pubId = poolStoreString(&parser->m_tempPool, enc,
4846
s + enc->minBytesPerChar,
4847
next - enc->minBytesPerChar);
4848
if (! pubId)
4849
return XML_ERROR_NO_MEMORY;
4850
normalizePublicId(pubId);
4851
poolFinish(&parser->m_tempPool);
4852
parser->m_doctypePubid = pubId;
4853
handleDefault = XML_FALSE;
4854
goto alreadyChecked;
4855
}
4856
/* fall through */
4857
case XML_ROLE_ENTITY_PUBLIC_ID:
4858
if (! XmlIsPublicId(enc, s, next, eventPP))
4859
return XML_ERROR_PUBLICID;
4860
alreadyChecked:
4861
if (dtd->keepProcessing && parser->m_declEntity) {
4862
XML_Char *tem
4863
= poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4864
next - enc->minBytesPerChar);
4865
if (! tem)
4866
return XML_ERROR_NO_MEMORY;
4867
normalizePublicId(tem);
4868
parser->m_declEntity->publicId = tem;
4869
poolFinish(&dtd->pool);
4870
/* Don't suppress the default handler if we fell through from
4871
* the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4872
*/
4873
if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4874
handleDefault = XML_FALSE;
4875
}
4876
break;
4877
case XML_ROLE_DOCTYPE_CLOSE:
4878
if (allowClosingDoctype != XML_TRUE) {
4879
/* Must not close doctype from within expanded parameter entities */
4880
return XML_ERROR_INVALID_TOKEN;
4881
}
4882
4883
if (parser->m_doctypeName) {
4884
parser->m_startDoctypeDeclHandler(
4885
parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4886
parser->m_doctypePubid, 0);
4887
poolClear(&parser->m_tempPool);
4888
handleDefault = XML_FALSE;
4889
}
4890
/* parser->m_doctypeSysid will be non-NULL in the case of a previous
4891
XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4892
was not set, indicating an external subset
4893
*/
4894
#ifdef XML_DTD
4895
if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4896
XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4897
dtd->hasParamEntityRefs = XML_TRUE;
4898
if (parser->m_paramEntityParsing
4899
&& parser->m_externalEntityRefHandler) {
4900
ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4901
externalSubsetName, sizeof(ENTITY));
4902
if (! entity) {
4903
/* The external subset name "#" will have already been
4904
* inserted into the hash table at the start of the
4905
* external entity parsing, so no allocation will happen
4906
* and lookup() cannot fail.
4907
*/
4908
return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4909
}
4910
if (parser->m_useForeignDTD)
4911
entity->base = parser->m_curBase;
4912
dtd->paramEntityRead = XML_FALSE;
4913
if (! parser->m_externalEntityRefHandler(
4914
parser->m_externalEntityRefHandlerArg, 0, entity->base,
4915
entity->systemId, entity->publicId))
4916
return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4917
if (dtd->paramEntityRead) {
4918
if (! dtd->standalone && parser->m_notStandaloneHandler
4919
&& ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4920
return XML_ERROR_NOT_STANDALONE;
4921
}
4922
/* if we didn't read the foreign DTD then this means that there
4923
is no external subset and we must reset dtd->hasParamEntityRefs
4924
*/
4925
else if (! parser->m_doctypeSysid)
4926
dtd->hasParamEntityRefs = hadParamEntityRefs;
4927
/* end of DTD - no need to update dtd->keepProcessing */
4928
}
4929
parser->m_useForeignDTD = XML_FALSE;
4930
}
4931
#endif /* XML_DTD */
4932
if (parser->m_endDoctypeDeclHandler) {
4933
parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4934
handleDefault = XML_FALSE;
4935
}
4936
break;
4937
case XML_ROLE_INSTANCE_START:
4938
#ifdef XML_DTD
4939
/* if there is no DOCTYPE declaration then now is the
4940
last chance to read the foreign DTD
4941
*/
4942
if (parser->m_useForeignDTD) {
4943
XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4944
dtd->hasParamEntityRefs = XML_TRUE;
4945
if (parser->m_paramEntityParsing
4946
&& parser->m_externalEntityRefHandler) {
4947
ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4948
externalSubsetName, sizeof(ENTITY));
4949
if (! entity)
4950
return XML_ERROR_NO_MEMORY;
4951
entity->base = parser->m_curBase;
4952
dtd->paramEntityRead = XML_FALSE;
4953
if (! parser->m_externalEntityRefHandler(
4954
parser->m_externalEntityRefHandlerArg, 0, entity->base,
4955
entity->systemId, entity->publicId))
4956
return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4957
if (dtd->paramEntityRead) {
4958
if (! dtd->standalone && parser->m_notStandaloneHandler
4959
&& ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4960
return XML_ERROR_NOT_STANDALONE;
4961
}
4962
/* if we didn't read the foreign DTD then this means that there
4963
is no external subset and we must reset dtd->hasParamEntityRefs
4964
*/
4965
else
4966
dtd->hasParamEntityRefs = hadParamEntityRefs;
4967
/* end of DTD - no need to update dtd->keepProcessing */
4968
}
4969
}
4970
#endif /* XML_DTD */
4971
parser->m_processor = contentProcessor;
4972
return contentProcessor(parser, s, end, nextPtr);
4973
case XML_ROLE_ATTLIST_ELEMENT_NAME:
4974
parser->m_declElementType = getElementType(parser, enc, s, next);
4975
if (! parser->m_declElementType)
4976
return XML_ERROR_NO_MEMORY;
4977
goto checkAttListDeclHandler;
4978
case XML_ROLE_ATTRIBUTE_NAME:
4979
parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4980
if (! parser->m_declAttributeId)
4981
return XML_ERROR_NO_MEMORY;
4982
parser->m_declAttributeIsCdata = XML_FALSE;
4983
parser->m_declAttributeType = NULL;
4984
parser->m_declAttributeIsId = XML_FALSE;
4985
goto checkAttListDeclHandler;
4986
case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4987
parser->m_declAttributeIsCdata = XML_TRUE;
4988
parser->m_declAttributeType = atypeCDATA;
4989
goto checkAttListDeclHandler;
4990
case XML_ROLE_ATTRIBUTE_TYPE_ID:
4991
parser->m_declAttributeIsId = XML_TRUE;
4992
parser->m_declAttributeType = atypeID;
4993
goto checkAttListDeclHandler;
4994
case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
4995
parser->m_declAttributeType = atypeIDREF;
4996
goto checkAttListDeclHandler;
4997
case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
4998
parser->m_declAttributeType = atypeIDREFS;
4999
goto checkAttListDeclHandler;
5000
case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
5001
parser->m_declAttributeType = atypeENTITY;
5002
goto checkAttListDeclHandler;
5003
case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
5004
parser->m_declAttributeType = atypeENTITIES;
5005
goto checkAttListDeclHandler;
5006
case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
5007
parser->m_declAttributeType = atypeNMTOKEN;
5008
goto checkAttListDeclHandler;
5009
case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5010
parser->m_declAttributeType = atypeNMTOKENS;
5011
checkAttListDeclHandler:
5012
if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5013
handleDefault = XML_FALSE;
5014
break;
5015
case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5016
case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5017
if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5018
const XML_Char *prefix;
5019
if (parser->m_declAttributeType) {
5020
prefix = enumValueSep;
5021
} else {
5022
prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5023
: enumValueStart);
5024
}
5025
if (! poolAppendString(&parser->m_tempPool, prefix))
5026
return XML_ERROR_NO_MEMORY;
5027
if (! poolAppend(&parser->m_tempPool, enc, s, next))
5028
return XML_ERROR_NO_MEMORY;
5029
parser->m_declAttributeType = parser->m_tempPool.start;
5030
handleDefault = XML_FALSE;
5031
}
5032
break;
5033
case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5034
case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5035
if (dtd->keepProcessing) {
5036
if (! defineAttribute(parser->m_declElementType,
5037
parser->m_declAttributeId,
5038
parser->m_declAttributeIsCdata,
5039
parser->m_declAttributeIsId, 0, parser))
5040
return XML_ERROR_NO_MEMORY;
5041
if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5042
if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5043
|| (*parser->m_declAttributeType == XML_T(ASCII_N)
5044
&& parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5045
/* Enumerated or Notation type */
5046
if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5047
|| ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5048
return XML_ERROR_NO_MEMORY;
5049
parser->m_declAttributeType = parser->m_tempPool.start;
5050
poolFinish(&parser->m_tempPool);
5051
}
5052
*eventEndPP = s;
5053
parser->m_attlistDeclHandler(
5054
parser->m_handlerArg, parser->m_declElementType->name,
5055
parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5056
role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5057
handleDefault = XML_FALSE;
5058
}
5059
}
5060
poolClear(&parser->m_tempPool);
5061
break;
5062
case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5063
case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5064
if (dtd->keepProcessing) {
5065
const XML_Char *attVal;
5066
enum XML_Error result = storeAttributeValue(
5067
parser, enc, parser->m_declAttributeIsCdata,
5068
s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5069
XML_ACCOUNT_NONE);
5070
if (result)
5071
return result;
5072
attVal = poolStart(&dtd->pool);
5073
poolFinish(&dtd->pool);
5074
/* ID attributes aren't allowed to have a default */
5075
if (! defineAttribute(
5076
parser->m_declElementType, parser->m_declAttributeId,
5077
parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5078
return XML_ERROR_NO_MEMORY;
5079
if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5080
if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5081
|| (*parser->m_declAttributeType == XML_T(ASCII_N)
5082
&& parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5083
/* Enumerated or Notation type */
5084
if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5085
|| ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5086
return XML_ERROR_NO_MEMORY;
5087
parser->m_declAttributeType = parser->m_tempPool.start;
5088
poolFinish(&parser->m_tempPool);
5089
}
5090
*eventEndPP = s;
5091
parser->m_attlistDeclHandler(
5092
parser->m_handlerArg, parser->m_declElementType->name,
5093
parser->m_declAttributeId->name, parser->m_declAttributeType,
5094
attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5095
poolClear(&parser->m_tempPool);
5096
handleDefault = XML_FALSE;
5097
}
5098
}
5099
break;
5100
case XML_ROLE_ENTITY_VALUE:
5101
if (dtd->keepProcessing) {
5102
#if XML_GE == 1
5103
// This will store the given replacement text in
5104
// parser->m_declEntity->textPtr.
5105
enum XML_Error result
5106
= storeEntityValue(parser, enc, s + enc->minBytesPerChar,
5107
next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
5108
if (parser->m_declEntity) {
5109
parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5110
parser->m_declEntity->textLen
5111
= (int)(poolLength(&dtd->entityValuePool));
5112
poolFinish(&dtd->entityValuePool);
5113
if (parser->m_entityDeclHandler) {
5114
*eventEndPP = s;
5115
parser->m_entityDeclHandler(
5116
parser->m_handlerArg, parser->m_declEntity->name,
5117
parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5118
parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5119
handleDefault = XML_FALSE;
5120
}
5121
} else
5122
poolDiscard(&dtd->entityValuePool);
5123
if (result != XML_ERROR_NONE)
5124
return result;
5125
#else
5126
// This will store "&amp;entity123;" in parser->m_declEntity->textPtr
5127
// to end up as "&entity123;" in the handler.
5128
if (parser->m_declEntity != NULL) {
5129
const enum XML_Error result
5130
= storeSelfEntityValue(parser, parser->m_declEntity);
5131
if (result != XML_ERROR_NONE)
5132
return result;
5133
5134
if (parser->m_entityDeclHandler) {
5135
*eventEndPP = s;
5136
parser->m_entityDeclHandler(
5137
parser->m_handlerArg, parser->m_declEntity->name,
5138
parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5139
parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5140
handleDefault = XML_FALSE;
5141
}
5142
}
5143
#endif
5144
}
5145
break;
5146
case XML_ROLE_DOCTYPE_SYSTEM_ID:
5147
#ifdef XML_DTD
5148
parser->m_useForeignDTD = XML_FALSE;
5149
#endif /* XML_DTD */
5150
dtd->hasParamEntityRefs = XML_TRUE;
5151
if (parser->m_startDoctypeDeclHandler) {
5152
parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5153
s + enc->minBytesPerChar,
5154
next - enc->minBytesPerChar);
5155
if (parser->m_doctypeSysid == NULL)
5156
return XML_ERROR_NO_MEMORY;
5157
poolFinish(&parser->m_tempPool);
5158
handleDefault = XML_FALSE;
5159
}
5160
#ifdef XML_DTD
5161
else
5162
/* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5163
for the case where no parser->m_startDoctypeDeclHandler is set */
5164
parser->m_doctypeSysid = externalSubsetName;
5165
#endif /* XML_DTD */
5166
if (! dtd->standalone
5167
#ifdef XML_DTD
5168
&& ! parser->m_paramEntityParsing
5169
#endif /* XML_DTD */
5170
&& parser->m_notStandaloneHandler
5171
&& ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5172
return XML_ERROR_NOT_STANDALONE;
5173
#ifndef XML_DTD
5174
break;
5175
#else /* XML_DTD */
5176
if (! parser->m_declEntity) {
5177
parser->m_declEntity = (ENTITY *)lookup(
5178
parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5179
if (! parser->m_declEntity)
5180
return XML_ERROR_NO_MEMORY;
5181
parser->m_declEntity->publicId = NULL;
5182
}
5183
#endif /* XML_DTD */
5184
/* fall through */
5185
case XML_ROLE_ENTITY_SYSTEM_ID:
5186
if (dtd->keepProcessing && parser->m_declEntity) {
5187
parser->m_declEntity->systemId
5188
= poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5189
next - enc->minBytesPerChar);
5190
if (! parser->m_declEntity->systemId)
5191
return XML_ERROR_NO_MEMORY;
5192
parser->m_declEntity->base = parser->m_curBase;
5193
poolFinish(&dtd->pool);
5194
/* Don't suppress the default handler if we fell through from
5195
* the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5196
*/
5197
if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5198
handleDefault = XML_FALSE;
5199
}
5200
break;
5201
case XML_ROLE_ENTITY_COMPLETE:
5202
#if XML_GE == 0
5203
// This will store "&amp;entity123;" in entity->textPtr
5204
// to end up as "&entity123;" in the handler.
5205
if (parser->m_declEntity != NULL) {
5206
const enum XML_Error result
5207
= storeSelfEntityValue(parser, parser->m_declEntity);
5208
if (result != XML_ERROR_NONE)
5209
return result;
5210
}
5211
#endif
5212
if (dtd->keepProcessing && parser->m_declEntity
5213
&& parser->m_entityDeclHandler) {
5214
*eventEndPP = s;
5215
parser->m_entityDeclHandler(
5216
parser->m_handlerArg, parser->m_declEntity->name,
5217
parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5218
parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5219
handleDefault = XML_FALSE;
5220
}
5221
break;
5222
case XML_ROLE_ENTITY_NOTATION_NAME:
5223
if (dtd->keepProcessing && parser->m_declEntity) {
5224
parser->m_declEntity->notation
5225
= poolStoreString(&dtd->pool, enc, s, next);
5226
if (! parser->m_declEntity->notation)
5227
return XML_ERROR_NO_MEMORY;
5228
poolFinish(&dtd->pool);
5229
if (parser->m_unparsedEntityDeclHandler) {
5230
*eventEndPP = s;
5231
parser->m_unparsedEntityDeclHandler(
5232
parser->m_handlerArg, parser->m_declEntity->name,
5233
parser->m_declEntity->base, parser->m_declEntity->systemId,
5234
parser->m_declEntity->publicId, parser->m_declEntity->notation);
5235
handleDefault = XML_FALSE;
5236
} else if (parser->m_entityDeclHandler) {
5237
*eventEndPP = s;
5238
parser->m_entityDeclHandler(
5239
parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5240
parser->m_declEntity->base, parser->m_declEntity->systemId,
5241
parser->m_declEntity->publicId, parser->m_declEntity->notation);
5242
handleDefault = XML_FALSE;
5243
}
5244
}
5245
break;
5246
case XML_ROLE_GENERAL_ENTITY_NAME: {
5247
if (XmlPredefinedEntityName(enc, s, next)) {
5248
parser->m_declEntity = NULL;
5249
break;
5250
}
5251
if (dtd->keepProcessing) {
5252
const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5253
if (! name)
5254
return XML_ERROR_NO_MEMORY;
5255
parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5256
name, sizeof(ENTITY));
5257
if (! parser->m_declEntity)
5258
return XML_ERROR_NO_MEMORY;
5259
if (parser->m_declEntity->name != name) {
5260
poolDiscard(&dtd->pool);
5261
parser->m_declEntity = NULL;
5262
} else {
5263
poolFinish(&dtd->pool);
5264
parser->m_declEntity->publicId = NULL;
5265
parser->m_declEntity->is_param = XML_FALSE;
5266
/* if we have a parent parser or are reading an internal parameter
5267
entity, then the entity declaration is not considered "internal"
5268
*/
5269
parser->m_declEntity->is_internal
5270
= ! (parser->m_parentParser || parser->m_openInternalEntities);
5271
if (parser->m_entityDeclHandler)
5272
handleDefault = XML_FALSE;
5273
}
5274
} else {
5275
poolDiscard(&dtd->pool);
5276
parser->m_declEntity = NULL;
5277
}
5278
} break;
5279
case XML_ROLE_PARAM_ENTITY_NAME:
5280
#ifdef XML_DTD
5281
if (dtd->keepProcessing) {
5282
const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5283
if (! name)
5284
return XML_ERROR_NO_MEMORY;
5285
parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5286
name, sizeof(ENTITY));
5287
if (! parser->m_declEntity)
5288
return XML_ERROR_NO_MEMORY;
5289
if (parser->m_declEntity->name != name) {
5290
poolDiscard(&dtd->pool);
5291
parser->m_declEntity = NULL;
5292
} else {
5293
poolFinish(&dtd->pool);
5294
parser->m_declEntity->publicId = NULL;
5295
parser->m_declEntity->is_param = XML_TRUE;
5296
/* if we have a parent parser or are reading an internal parameter
5297
entity, then the entity declaration is not considered "internal"
5298
*/
5299
parser->m_declEntity->is_internal
5300
= ! (parser->m_parentParser || parser->m_openInternalEntities);
5301
if (parser->m_entityDeclHandler)
5302
handleDefault = XML_FALSE;
5303
}
5304
} else {
5305
poolDiscard(&dtd->pool);
5306
parser->m_declEntity = NULL;
5307
}
5308
#else /* not XML_DTD */
5309
parser->m_declEntity = NULL;
5310
#endif /* XML_DTD */
5311
break;
5312
case XML_ROLE_NOTATION_NAME:
5313
parser->m_declNotationPublicId = NULL;
5314
parser->m_declNotationName = NULL;
5315
if (parser->m_notationDeclHandler) {
5316
parser->m_declNotationName
5317
= poolStoreString(&parser->m_tempPool, enc, s, next);
5318
if (! parser->m_declNotationName)
5319
return XML_ERROR_NO_MEMORY;
5320
poolFinish(&parser->m_tempPool);
5321
handleDefault = XML_FALSE;
5322
}
5323
break;
5324
case XML_ROLE_NOTATION_PUBLIC_ID:
5325
if (! XmlIsPublicId(enc, s, next, eventPP))
5326
return XML_ERROR_PUBLICID;
5327
if (parser
5328
->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5329
XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5330
s + enc->minBytesPerChar,
5331
next - enc->minBytesPerChar);
5332
if (! tem)
5333
return XML_ERROR_NO_MEMORY;
5334
normalizePublicId(tem);
5335
parser->m_declNotationPublicId = tem;
5336
poolFinish(&parser->m_tempPool);
5337
handleDefault = XML_FALSE;
5338
}
5339
break;
5340
case XML_ROLE_NOTATION_SYSTEM_ID:
5341
if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5342
const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5343
s + enc->minBytesPerChar,
5344
next - enc->minBytesPerChar);
5345
if (! systemId)
5346
return XML_ERROR_NO_MEMORY;
5347
*eventEndPP = s;
5348
parser->m_notationDeclHandler(
5349
parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5350
systemId, parser->m_declNotationPublicId);
5351
handleDefault = XML_FALSE;
5352
}
5353
poolClear(&parser->m_tempPool);
5354
break;
5355
case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5356
if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5357
*eventEndPP = s;
5358
parser->m_notationDeclHandler(
5359
parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5360
0, parser->m_declNotationPublicId);
5361
handleDefault = XML_FALSE;
5362
}
5363
poolClear(&parser->m_tempPool);
5364
break;
5365
case XML_ROLE_ERROR:
5366
switch (tok) {
5367
case XML_TOK_PARAM_ENTITY_REF:
5368
/* PE references in internal subset are
5369
not allowed within declarations. */
5370
return XML_ERROR_PARAM_ENTITY_REF;
5371
case XML_TOK_XML_DECL:
5372
return XML_ERROR_MISPLACED_XML_PI;
5373
default:
5374
return XML_ERROR_SYNTAX;
5375
}
5376
#ifdef XML_DTD
5377
case XML_ROLE_IGNORE_SECT: {
5378
enum XML_Error result;
5379
if (parser->m_defaultHandler)
5380
reportDefault(parser, enc, s, next);
5381
handleDefault = XML_FALSE;
5382
result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5383
if (result != XML_ERROR_NONE)
5384
return result;
5385
else if (! next) {
5386
parser->m_processor = ignoreSectionProcessor;
5387
return result;
5388
}
5389
} break;
5390
#endif /* XML_DTD */
5391
case XML_ROLE_GROUP_OPEN:
5392
if (parser->m_prologState.level >= parser->m_groupSize) {
5393
if (parser->m_groupSize) {
5394
{
5395
/* Detect and prevent integer overflow */
5396
if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5397
return XML_ERROR_NO_MEMORY;
5398
}
5399
5400
char *const new_connector = (char *)REALLOC(
5401
parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5402
if (new_connector == NULL) {
5403
parser->m_groupSize /= 2;
5404
return XML_ERROR_NO_MEMORY;
5405
}
5406
parser->m_groupConnector = new_connector;
5407
}
5408
5409
if (dtd->scaffIndex) {
5410
/* Detect and prevent integer overflow.
5411
* The preprocessor guard addresses the "always false" warning
5412
* from -Wtype-limits on platforms where
5413
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5414
#if UINT_MAX >= SIZE_MAX
5415
if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5416
return XML_ERROR_NO_MEMORY;
5417
}
5418
#endif
5419
5420
int *const new_scaff_index = (int *)REALLOC(
5421
parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5422
if (new_scaff_index == NULL)
5423
return XML_ERROR_NO_MEMORY;
5424
dtd->scaffIndex = new_scaff_index;
5425
}
5426
} else {
5427
parser->m_groupConnector
5428
= (char *)MALLOC(parser, parser->m_groupSize = 32);
5429
if (! parser->m_groupConnector) {
5430
parser->m_groupSize = 0;
5431
return XML_ERROR_NO_MEMORY;
5432
}
5433
}
5434
}
5435
parser->m_groupConnector[parser->m_prologState.level] = 0;
5436
if (dtd->in_eldecl) {
5437
int myindex = nextScaffoldPart(parser);
5438
if (myindex < 0)
5439
return XML_ERROR_NO_MEMORY;
5440
assert(dtd->scaffIndex != NULL);
5441
dtd->scaffIndex[dtd->scaffLevel] = myindex;
5442
dtd->scaffLevel++;
5443
dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5444
if (parser->m_elementDeclHandler)
5445
handleDefault = XML_FALSE;
5446
}
5447
break;
5448
case XML_ROLE_GROUP_SEQUENCE:
5449
if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5450
return XML_ERROR_SYNTAX;
5451
parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5452
if (dtd->in_eldecl && parser->m_elementDeclHandler)
5453
handleDefault = XML_FALSE;
5454
break;
5455
case XML_ROLE_GROUP_CHOICE:
5456
if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5457
return XML_ERROR_SYNTAX;
5458
if (dtd->in_eldecl
5459
&& ! parser->m_groupConnector[parser->m_prologState.level]
5460
&& (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5461
!= XML_CTYPE_MIXED)) {
5462
dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5463
= XML_CTYPE_CHOICE;
5464
if (parser->m_elementDeclHandler)
5465
handleDefault = XML_FALSE;
5466
}
5467
parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5468
break;
5469
case XML_ROLE_PARAM_ENTITY_REF:
5470
#ifdef XML_DTD
5471
case XML_ROLE_INNER_PARAM_ENTITY_REF:
5472
dtd->hasParamEntityRefs = XML_TRUE;
5473
if (! parser->m_paramEntityParsing)
5474
dtd->keepProcessing = dtd->standalone;
5475
else {
5476
const XML_Char *name;
5477
ENTITY *entity;
5478
name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5479
next - enc->minBytesPerChar);
5480
if (! name)
5481
return XML_ERROR_NO_MEMORY;
5482
entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5483
poolDiscard(&dtd->pool);
5484
/* first, determine if a check for an existing declaration is needed;
5485
if yes, check that the entity exists, and that it is internal,
5486
otherwise call the skipped entity handler
5487
*/
5488
if (parser->m_prologState.documentEntity
5489
&& (dtd->standalone ? ! parser->m_openInternalEntities
5490
: ! dtd->hasParamEntityRefs)) {
5491
if (! entity)
5492
return XML_ERROR_UNDEFINED_ENTITY;
5493
else if (! entity->is_internal) {
5494
/* It's hard to exhaustively search the code to be sure,
5495
* but there doesn't seem to be a way of executing the
5496
* following line. There are two cases:
5497
*
5498
* If 'standalone' is false, the DTD must have no
5499
* parameter entities or we wouldn't have passed the outer
5500
* 'if' statement. That means the only entity in the hash
5501
* table is the external subset name "#" which cannot be
5502
* given as a parameter entity name in XML syntax, so the
5503
* lookup must have returned NULL and we don't even reach
5504
* the test for an internal entity.
5505
*
5506
* If 'standalone' is true, it does not seem to be
5507
* possible to create entities taking this code path that
5508
* are not internal entities, so fail the test above.
5509
*
5510
* Because this analysis is very uncertain, the code is
5511
* being left in place and merely removed from the
5512
* coverage test statistics.
5513
*/
5514
return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5515
}
5516
} else if (! entity) {
5517
dtd->keepProcessing = dtd->standalone;
5518
/* cannot report skipped entities in declarations */
5519
if ((role == XML_ROLE_PARAM_ENTITY_REF)
5520
&& parser->m_skippedEntityHandler) {
5521
parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5522
handleDefault = XML_FALSE;
5523
}
5524
break;
5525
}
5526
if (entity->open)
5527
return XML_ERROR_RECURSIVE_ENTITY_REF;
5528
if (entity->textPtr) {
5529
enum XML_Error result;
5530
XML_Bool betweenDecl
5531
= (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5532
result = processInternalEntity(parser, entity, betweenDecl);
5533
if (result != XML_ERROR_NONE)
5534
return result;
5535
handleDefault = XML_FALSE;
5536
break;
5537
}
5538
if (parser->m_externalEntityRefHandler) {
5539
dtd->paramEntityRead = XML_FALSE;
5540
entity->open = XML_TRUE;
5541
entityTrackingOnOpen(parser, entity, __LINE__);
5542
if (! parser->m_externalEntityRefHandler(
5543
parser->m_externalEntityRefHandlerArg, 0, entity->base,
5544
entity->systemId, entity->publicId)) {
5545
entityTrackingOnClose(parser, entity, __LINE__);
5546
entity->open = XML_FALSE;
5547
return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5548
}
5549
entityTrackingOnClose(parser, entity, __LINE__);
5550
entity->open = XML_FALSE;
5551
handleDefault = XML_FALSE;
5552
if (! dtd->paramEntityRead) {
5553
dtd->keepProcessing = dtd->standalone;
5554
break;
5555
}
5556
} else {
5557
dtd->keepProcessing = dtd->standalone;
5558
break;
5559
}
5560
}
5561
#endif /* XML_DTD */
5562
if (! dtd->standalone && parser->m_notStandaloneHandler
5563
&& ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5564
return XML_ERROR_NOT_STANDALONE;
5565
break;
5566
5567
/* Element declaration stuff */
5568
5569
case XML_ROLE_ELEMENT_NAME:
5570
if (parser->m_elementDeclHandler) {
5571
parser->m_declElementType = getElementType(parser, enc, s, next);
5572
if (! parser->m_declElementType)
5573
return XML_ERROR_NO_MEMORY;
5574
dtd->scaffLevel = 0;
5575
dtd->scaffCount = 0;
5576
dtd->in_eldecl = XML_TRUE;
5577
handleDefault = XML_FALSE;
5578
}
5579
break;
5580
5581
case XML_ROLE_CONTENT_ANY:
5582
case XML_ROLE_CONTENT_EMPTY:
5583
if (dtd->in_eldecl) {
5584
if (parser->m_elementDeclHandler) {
5585
XML_Content *content
5586
= (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5587
if (! content)
5588
return XML_ERROR_NO_MEMORY;
5589
content->quant = XML_CQUANT_NONE;
5590
content->name = NULL;
5591
content->numchildren = 0;
5592
content->children = NULL;
5593
content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5594
: XML_CTYPE_EMPTY);
5595
*eventEndPP = s;
5596
parser->m_elementDeclHandler(
5597
parser->m_handlerArg, parser->m_declElementType->name, content);
5598
handleDefault = XML_FALSE;
5599
}
5600
dtd->in_eldecl = XML_FALSE;
5601
}
5602
break;
5603
5604
case XML_ROLE_CONTENT_PCDATA:
5605
if (dtd->in_eldecl) {
5606
dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5607
= XML_CTYPE_MIXED;
5608
if (parser->m_elementDeclHandler)
5609
handleDefault = XML_FALSE;
5610
}
5611
break;
5612
5613
case XML_ROLE_CONTENT_ELEMENT:
5614
quant = XML_CQUANT_NONE;
5615
goto elementContent;
5616
case XML_ROLE_CONTENT_ELEMENT_OPT:
5617
quant = XML_CQUANT_OPT;
5618
goto elementContent;
5619
case XML_ROLE_CONTENT_ELEMENT_REP:
5620
quant = XML_CQUANT_REP;
5621
goto elementContent;
5622
case XML_ROLE_CONTENT_ELEMENT_PLUS:
5623
quant = XML_CQUANT_PLUS;
5624
elementContent:
5625
if (dtd->in_eldecl) {
5626
ELEMENT_TYPE *el;
5627
const XML_Char *name;
5628
size_t nameLen;
5629
const char *nxt
5630
= (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5631
int myindex = nextScaffoldPart(parser);
5632
if (myindex < 0)
5633
return XML_ERROR_NO_MEMORY;
5634
dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5635
dtd->scaffold[myindex].quant = quant;
5636
el = getElementType(parser, enc, s, nxt);
5637
if (! el)
5638
return XML_ERROR_NO_MEMORY;
5639
name = el->name;
5640
dtd->scaffold[myindex].name = name;
5641
nameLen = 0;
5642
for (; name[nameLen++];)
5643
;
5644
5645
/* Detect and prevent integer overflow */
5646
if (nameLen > UINT_MAX - dtd->contentStringLen) {
5647
return XML_ERROR_NO_MEMORY;
5648
}
5649
5650
dtd->contentStringLen += (unsigned)nameLen;
5651
if (parser->m_elementDeclHandler)
5652
handleDefault = XML_FALSE;
5653
}
5654
break;
5655
5656
case XML_ROLE_GROUP_CLOSE:
5657
quant = XML_CQUANT_NONE;
5658
goto closeGroup;
5659
case XML_ROLE_GROUP_CLOSE_OPT:
5660
quant = XML_CQUANT_OPT;
5661
goto closeGroup;
5662
case XML_ROLE_GROUP_CLOSE_REP:
5663
quant = XML_CQUANT_REP;
5664
goto closeGroup;
5665
case XML_ROLE_GROUP_CLOSE_PLUS:
5666
quant = XML_CQUANT_PLUS;
5667
closeGroup:
5668
if (dtd->in_eldecl) {
5669
if (parser->m_elementDeclHandler)
5670
handleDefault = XML_FALSE;
5671
dtd->scaffLevel--;
5672
dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5673
if (dtd->scaffLevel == 0) {
5674
if (! handleDefault) {
5675
XML_Content *model = build_model(parser);
5676
if (! model)
5677
return XML_ERROR_NO_MEMORY;
5678
*eventEndPP = s;
5679
parser->m_elementDeclHandler(
5680
parser->m_handlerArg, parser->m_declElementType->name, model);
5681
}
5682
dtd->in_eldecl = XML_FALSE;
5683
dtd->contentStringLen = 0;
5684
}
5685
}
5686
break;
5687
/* End element declaration stuff */
5688
5689
case XML_ROLE_PI:
5690
if (! reportProcessingInstruction(parser, enc, s, next))
5691
return XML_ERROR_NO_MEMORY;
5692
handleDefault = XML_FALSE;
5693
break;
5694
case XML_ROLE_COMMENT:
5695
if (! reportComment(parser, enc, s, next))
5696
return XML_ERROR_NO_MEMORY;
5697
handleDefault = XML_FALSE;
5698
break;
5699
case XML_ROLE_NONE:
5700
switch (tok) {
5701
case XML_TOK_BOM:
5702
handleDefault = XML_FALSE;
5703
break;
5704
}
5705
break;
5706
case XML_ROLE_DOCTYPE_NONE:
5707
if (parser->m_startDoctypeDeclHandler)
5708
handleDefault = XML_FALSE;
5709
break;
5710
case XML_ROLE_ENTITY_NONE:
5711
if (dtd->keepProcessing && parser->m_entityDeclHandler)
5712
handleDefault = XML_FALSE;
5713
break;
5714
case XML_ROLE_NOTATION_NONE:
5715
if (parser->m_notationDeclHandler)
5716
handleDefault = XML_FALSE;
5717
break;
5718
case XML_ROLE_ATTLIST_NONE:
5719
if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5720
handleDefault = XML_FALSE;
5721
break;
5722
case XML_ROLE_ELEMENT_NONE:
5723
if (parser->m_elementDeclHandler)
5724
handleDefault = XML_FALSE;
5725
break;
5726
} /* end of big switch */
5727
5728
if (handleDefault && parser->m_defaultHandler)
5729
reportDefault(parser, enc, s, next);
5730
5731
switch (parser->m_parsingStatus.parsing) {
5732
case XML_SUSPENDED:
5733
*nextPtr = next;
5734
return XML_ERROR_NONE;
5735
case XML_FINISHED:
5736
return XML_ERROR_ABORTED;
5737
default:
5738
s = next;
5739
tok = XmlPrologTok(enc, s, end, &next);
5740
}
5741
}
5742
/* not reached */
5743
}
5744
5745
static enum XML_Error PTRCALL
5746
epilogProcessor(XML_Parser parser, const char *s, const char *end,
5747
const char **nextPtr) {
5748
parser->m_processor = epilogProcessor;
5749
parser->m_eventPtr = s;
5750
for (;;) {
5751
const char *next = NULL;
5752
int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5753
#if XML_GE == 1
5754
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5755
XML_ACCOUNT_DIRECT)) {
5756
accountingOnAbort(parser);
5757
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5758
}
5759
#endif
5760
parser->m_eventEndPtr = next;
5761
switch (tok) {
5762
/* report partial linebreak - it might be the last token */
5763
case -XML_TOK_PROLOG_S:
5764
if (parser->m_defaultHandler) {
5765
reportDefault(parser, parser->m_encoding, s, next);
5766
if (parser->m_parsingStatus.parsing == XML_FINISHED)
5767
return XML_ERROR_ABORTED;
5768
}
5769
*nextPtr = next;
5770
return XML_ERROR_NONE;
5771
case XML_TOK_NONE:
5772
*nextPtr = s;
5773
return XML_ERROR_NONE;
5774
case XML_TOK_PROLOG_S:
5775
if (parser->m_defaultHandler)
5776
reportDefault(parser, parser->m_encoding, s, next);
5777
break;
5778
case XML_TOK_PI:
5779
if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5780
return XML_ERROR_NO_MEMORY;
5781
break;
5782
case XML_TOK_COMMENT:
5783
if (! reportComment(parser, parser->m_encoding, s, next))
5784
return XML_ERROR_NO_MEMORY;
5785
break;
5786
case XML_TOK_INVALID:
5787
parser->m_eventPtr = next;
5788
return XML_ERROR_INVALID_TOKEN;
5789
case XML_TOK_PARTIAL:
5790
if (! parser->m_parsingStatus.finalBuffer) {
5791
*nextPtr = s;
5792
return XML_ERROR_NONE;
5793
}
5794
return XML_ERROR_UNCLOSED_TOKEN;
5795
case XML_TOK_PARTIAL_CHAR:
5796
if (! parser->m_parsingStatus.finalBuffer) {
5797
*nextPtr = s;
5798
return XML_ERROR_NONE;
5799
}
5800
return XML_ERROR_PARTIAL_CHAR;
5801
default:
5802
return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5803
}
5804
parser->m_eventPtr = s = next;
5805
switch (parser->m_parsingStatus.parsing) {
5806
case XML_SUSPENDED:
5807
*nextPtr = next;
5808
return XML_ERROR_NONE;
5809
case XML_FINISHED:
5810
return XML_ERROR_ABORTED;
5811
default:;
5812
}
5813
}
5814
}
5815
5816
static enum XML_Error
5817
processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
5818
const char *textStart, *textEnd;
5819
const char *next;
5820
enum XML_Error result;
5821
OPEN_INTERNAL_ENTITY *openEntity;
5822
5823
if (parser->m_freeInternalEntities) {
5824
openEntity = parser->m_freeInternalEntities;
5825
parser->m_freeInternalEntities = openEntity->next;
5826
} else {
5827
openEntity
5828
= (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5829
if (! openEntity)
5830
return XML_ERROR_NO_MEMORY;
5831
}
5832
entity->open = XML_TRUE;
5833
#if XML_GE == 1
5834
entityTrackingOnOpen(parser, entity, __LINE__);
5835
#endif
5836
entity->processed = 0;
5837
openEntity->next = parser->m_openInternalEntities;
5838
parser->m_openInternalEntities = openEntity;
5839
openEntity->entity = entity;
5840
openEntity->startTagLevel = parser->m_tagLevel;
5841
openEntity->betweenDecl = betweenDecl;
5842
openEntity->internalEventPtr = NULL;
5843
openEntity->internalEventEndPtr = NULL;
5844
textStart = (const char *)entity->textPtr;
5845
textEnd = (const char *)(entity->textPtr + entity->textLen);
5846
/* Set a safe default value in case 'next' does not get set */
5847
next = textStart;
5848
5849
#ifdef XML_DTD
5850
if (entity->is_param) {
5851
int tok
5852
= XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5853
result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5854
tok, next, &next, XML_FALSE, XML_FALSE,
5855
XML_ACCOUNT_ENTITY_EXPANSION);
5856
} else
5857
#endif /* XML_DTD */
5858
result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
5859
textStart, textEnd, &next, XML_FALSE,
5860
XML_ACCOUNT_ENTITY_EXPANSION);
5861
5862
if (result == XML_ERROR_NONE) {
5863
if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5864
entity->processed = (int)(next - textStart);
5865
parser->m_processor = internalEntityProcessor;
5866
} else if (parser->m_openInternalEntities->entity == entity) {
5867
#if XML_GE == 1
5868
entityTrackingOnClose(parser, entity, __LINE__);
5869
#endif /* XML_GE == 1 */
5870
entity->open = XML_FALSE;
5871
parser->m_openInternalEntities = openEntity->next;
5872
/* put openEntity back in list of free instances */
5873
openEntity->next = parser->m_freeInternalEntities;
5874
parser->m_freeInternalEntities = openEntity;
5875
}
5876
}
5877
return result;
5878
}
5879
5880
static enum XML_Error PTRCALL
5881
internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5882
const char **nextPtr) {
5883
ENTITY *entity;
5884
const char *textStart, *textEnd;
5885
const char *next;
5886
enum XML_Error result;
5887
OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5888
if (! openEntity)
5889
return XML_ERROR_UNEXPECTED_STATE;
5890
5891
entity = openEntity->entity;
5892
textStart = ((const char *)entity->textPtr) + entity->processed;
5893
textEnd = (const char *)(entity->textPtr + entity->textLen);
5894
/* Set a safe default value in case 'next' does not get set */
5895
next = textStart;
5896
5897
#ifdef XML_DTD
5898
if (entity->is_param) {
5899
int tok
5900
= XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5901
result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5902
tok, next, &next, XML_FALSE, XML_TRUE,
5903
XML_ACCOUNT_ENTITY_EXPANSION);
5904
} else
5905
#endif /* XML_DTD */
5906
result = doContent(parser, openEntity->startTagLevel,
5907
parser->m_internalEncoding, textStart, textEnd, &next,
5908
XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
5909
5910
if (result != XML_ERROR_NONE)
5911
return result;
5912
5913
if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5914
entity->processed = (int)(next - (const char *)entity->textPtr);
5915
return result;
5916
}
5917
5918
#if XML_GE == 1
5919
entityTrackingOnClose(parser, entity, __LINE__);
5920
#endif
5921
entity->open = XML_FALSE;
5922
parser->m_openInternalEntities = openEntity->next;
5923
/* put openEntity back in list of free instances */
5924
openEntity->next = parser->m_freeInternalEntities;
5925
parser->m_freeInternalEntities = openEntity;
5926
5927
// If there are more open entities we want to stop right here and have the
5928
// upcoming call to XML_ResumeParser continue with entity content, or it would
5929
// be ignored altogether.
5930
if (parser->m_openInternalEntities != NULL
5931
&& parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5932
return XML_ERROR_NONE;
5933
}
5934
5935
#ifdef XML_DTD
5936
if (entity->is_param) {
5937
int tok;
5938
parser->m_processor = prologProcessor;
5939
tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5940
return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5941
(XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5942
XML_ACCOUNT_DIRECT);
5943
} else
5944
#endif /* XML_DTD */
5945
{
5946
parser->m_processor = contentProcessor;
5947
/* see externalEntityContentProcessor vs contentProcessor */
5948
result = doContent(parser, parser->m_parentParser ? 1 : 0,
5949
parser->m_encoding, s, end, nextPtr,
5950
(XML_Bool)! parser->m_parsingStatus.finalBuffer,
5951
XML_ACCOUNT_DIRECT);
5952
if (result == XML_ERROR_NONE) {
5953
if (! storeRawNames(parser))
5954
return XML_ERROR_NO_MEMORY;
5955
}
5956
return result;
5957
}
5958
}
5959
5960
static enum XML_Error PTRCALL
5961
errorProcessor(XML_Parser parser, const char *s, const char *end,
5962
const char **nextPtr) {
5963
UNUSED_P(s);
5964
UNUSED_P(end);
5965
UNUSED_P(nextPtr);
5966
return parser->m_errorCode;
5967
}
5968
5969
static enum XML_Error
5970
storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5971
const char *ptr, const char *end, STRING_POOL *pool,
5972
enum XML_Account account) {
5973
enum XML_Error result
5974
= appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
5975
if (result)
5976
return result;
5977
if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5978
poolChop(pool);
5979
if (! poolAppendChar(pool, XML_T('\0')))
5980
return XML_ERROR_NO_MEMORY;
5981
return XML_ERROR_NONE;
5982
}
5983
5984
static enum XML_Error
5985
appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5986
const char *ptr, const char *end, STRING_POOL *pool,
5987
enum XML_Account account) {
5988
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5989
#ifndef XML_DTD
5990
UNUSED_P(account);
5991
#endif
5992
5993
for (;;) {
5994
const char *next
5995
= ptr; /* XmlAttributeValueTok doesn't always set the last arg */
5996
int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5997
#if XML_GE == 1
5998
if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
5999
accountingOnAbort(parser);
6000
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6001
}
6002
#endif
6003
switch (tok) {
6004
case XML_TOK_NONE:
6005
return XML_ERROR_NONE;
6006
case XML_TOK_INVALID:
6007
if (enc == parser->m_encoding)
6008
parser->m_eventPtr = next;
6009
return XML_ERROR_INVALID_TOKEN;
6010
case XML_TOK_PARTIAL:
6011
if (enc == parser->m_encoding)
6012
parser->m_eventPtr = ptr;
6013
return XML_ERROR_INVALID_TOKEN;
6014
case XML_TOK_CHAR_REF: {
6015
XML_Char buf[XML_ENCODE_MAX];
6016
int i;
6017
int n = XmlCharRefNumber(enc, ptr);
6018
if (n < 0) {
6019
if (enc == parser->m_encoding)
6020
parser->m_eventPtr = ptr;
6021
return XML_ERROR_BAD_CHAR_REF;
6022
}
6023
if (! isCdata && n == 0x20 /* space */
6024
&& (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6025
break;
6026
n = XmlEncode(n, (ICHAR *)buf);
6027
/* The XmlEncode() functions can never return 0 here. That
6028
* error return happens if the code point passed in is either
6029
* negative or greater than or equal to 0x110000. The
6030
* XmlCharRefNumber() functions will all return a number
6031
* strictly less than 0x110000 or a negative value if an error
6032
* occurred. The negative value is intercepted above, so
6033
* XmlEncode() is never passed a value it might return an
6034
* error for.
6035
*/
6036
for (i = 0; i < n; i++) {
6037
if (! poolAppendChar(pool, buf[i]))
6038
return XML_ERROR_NO_MEMORY;
6039
}
6040
} break;
6041
case XML_TOK_DATA_CHARS:
6042
if (! poolAppend(pool, enc, ptr, next))
6043
return XML_ERROR_NO_MEMORY;
6044
break;
6045
case XML_TOK_TRAILING_CR:
6046
next = ptr + enc->minBytesPerChar;
6047
/* fall through */
6048
case XML_TOK_ATTRIBUTE_VALUE_S:
6049
case XML_TOK_DATA_NEWLINE:
6050
if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6051
break;
6052
if (! poolAppendChar(pool, 0x20))
6053
return XML_ERROR_NO_MEMORY;
6054
break;
6055
case XML_TOK_ENTITY_REF: {
6056
const XML_Char *name;
6057
ENTITY *entity;
6058
char checkEntityDecl;
6059
XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6060
enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6061
if (ch) {
6062
#if XML_GE == 1
6063
/* NOTE: We are replacing 4-6 characters original input for 1 character
6064
* so there is no amplification and hence recording without
6065
* protection. */
6066
accountingDiffTolerated(parser, tok, (char *)&ch,
6067
((char *)&ch) + sizeof(XML_Char), __LINE__,
6068
XML_ACCOUNT_ENTITY_EXPANSION);
6069
#endif /* XML_GE == 1 */
6070
if (! poolAppendChar(pool, ch))
6071
return XML_ERROR_NO_MEMORY;
6072
break;
6073
}
6074
name = poolStoreString(&parser->m_temp2Pool, enc,
6075
ptr + enc->minBytesPerChar,
6076
next - enc->minBytesPerChar);
6077
if (! name)
6078
return XML_ERROR_NO_MEMORY;
6079
entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6080
poolDiscard(&parser->m_temp2Pool);
6081
/* First, determine if a check for an existing declaration is needed;
6082
if yes, check that the entity exists, and that it is internal.
6083
*/
6084
if (pool == &dtd->pool) /* are we called from prolog? */
6085
checkEntityDecl =
6086
#ifdef XML_DTD
6087
parser->m_prologState.documentEntity &&
6088
#endif /* XML_DTD */
6089
(dtd->standalone ? ! parser->m_openInternalEntities
6090
: ! dtd->hasParamEntityRefs);
6091
else /* if (pool == &parser->m_tempPool): we are called from content */
6092
checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6093
if (checkEntityDecl) {
6094
if (! entity)
6095
return XML_ERROR_UNDEFINED_ENTITY;
6096
else if (! entity->is_internal)
6097
return XML_ERROR_ENTITY_DECLARED_IN_PE;
6098
} else if (! entity) {
6099
/* Cannot report skipped entity here - see comments on
6100
parser->m_skippedEntityHandler.
6101
if (parser->m_skippedEntityHandler)
6102
parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6103
*/
6104
/* Cannot call the default handler because this would be
6105
out of sync with the call to the startElementHandler.
6106
if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6107
reportDefault(parser, enc, ptr, next);
6108
*/
6109
break;
6110
}
6111
if (entity->open) {
6112
if (enc == parser->m_encoding) {
6113
/* It does not appear that this line can be executed.
6114
*
6115
* The "if (entity->open)" check catches recursive entity
6116
* definitions. In order to be called with an open
6117
* entity, it must have gone through this code before and
6118
* been through the recursive call to
6119
* appendAttributeValue() some lines below. That call
6120
* sets the local encoding ("enc") to the parser's
6121
* internal encoding (internal_utf8 or internal_utf16),
6122
* which can never be the same as the principle encoding.
6123
* It doesn't appear there is another code path that gets
6124
* here with entity->open being TRUE.
6125
*
6126
* Since it is not certain that this logic is watertight,
6127
* we keep the line and merely exclude it from coverage
6128
* tests.
6129
*/
6130
parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6131
}
6132
return XML_ERROR_RECURSIVE_ENTITY_REF;
6133
}
6134
if (entity->notation) {
6135
if (enc == parser->m_encoding)
6136
parser->m_eventPtr = ptr;
6137
return XML_ERROR_BINARY_ENTITY_REF;
6138
}
6139
if (! entity->textPtr) {
6140
if (enc == parser->m_encoding)
6141
parser->m_eventPtr = ptr;
6142
return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6143
} else {
6144
enum XML_Error result;
6145
const XML_Char *textEnd = entity->textPtr + entity->textLen;
6146
entity->open = XML_TRUE;
6147
#if XML_GE == 1
6148
entityTrackingOnOpen(parser, entity, __LINE__);
6149
#endif
6150
result = appendAttributeValue(parser, parser->m_internalEncoding,
6151
isCdata, (const char *)entity->textPtr,
6152
(const char *)textEnd, pool,
6153
XML_ACCOUNT_ENTITY_EXPANSION);
6154
#if XML_GE == 1
6155
entityTrackingOnClose(parser, entity, __LINE__);
6156
#endif
6157
entity->open = XML_FALSE;
6158
if (result)
6159
return result;
6160
}
6161
} break;
6162
default:
6163
/* The only token returned by XmlAttributeValueTok() that does
6164
* not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6165
* Getting that would require an entity name to contain an
6166
* incomplete XML character (e.g. \xE2\x82); however previous
6167
* tokenisers will have already recognised and rejected such
6168
* names before XmlAttributeValueTok() gets a look-in. This
6169
* default case should be retained as a safety net, but the code
6170
* excluded from coverage tests.
6171
*
6172
* LCOV_EXCL_START
6173
*/
6174
if (enc == parser->m_encoding)
6175
parser->m_eventPtr = ptr;
6176
return XML_ERROR_UNEXPECTED_STATE;
6177
/* LCOV_EXCL_STOP */
6178
}
6179
ptr = next;
6180
}
6181
/* not reached */
6182
}
6183
6184
#if XML_GE == 1
6185
static enum XML_Error
6186
storeEntityValue(XML_Parser parser, const ENCODING *enc,
6187
const char *entityTextPtr, const char *entityTextEnd,
6188
enum XML_Account account) {
6189
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6190
STRING_POOL *pool = &(dtd->entityValuePool);
6191
enum XML_Error result = XML_ERROR_NONE;
6192
# ifdef XML_DTD
6193
int oldInEntityValue = parser->m_prologState.inEntityValue;
6194
parser->m_prologState.inEntityValue = 1;
6195
# else
6196
UNUSED_P(account);
6197
# endif /* XML_DTD */
6198
/* never return Null for the value argument in EntityDeclHandler,
6199
since this would indicate an external entity; therefore we
6200
have to make sure that entityValuePool.start is not null */
6201
if (! pool->blocks) {
6202
if (! poolGrow(pool))
6203
return XML_ERROR_NO_MEMORY;
6204
}
6205
6206
for (;;) {
6207
const char *next
6208
= entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6209
int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6210
6211
if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6212
account)) {
6213
accountingOnAbort(parser);
6214
result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6215
goto endEntityValue;
6216
}
6217
6218
switch (tok) {
6219
case XML_TOK_PARAM_ENTITY_REF:
6220
# ifdef XML_DTD
6221
if (parser->m_isParamEntity || enc != parser->m_encoding) {
6222
const XML_Char *name;
6223
ENTITY *entity;
6224
name = poolStoreString(&parser->m_tempPool, enc,
6225
entityTextPtr + enc->minBytesPerChar,
6226
next - enc->minBytesPerChar);
6227
if (! name) {
6228
result = XML_ERROR_NO_MEMORY;
6229
goto endEntityValue;
6230
}
6231
entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6232
poolDiscard(&parser->m_tempPool);
6233
if (! entity) {
6234
/* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6235
/* cannot report skipped entity here - see comments on
6236
parser->m_skippedEntityHandler
6237
if (parser->m_skippedEntityHandler)
6238
parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6239
*/
6240
dtd->keepProcessing = dtd->standalone;
6241
goto endEntityValue;
6242
}
6243
if (entity->open || (entity == parser->m_declEntity)) {
6244
if (enc == parser->m_encoding)
6245
parser->m_eventPtr = entityTextPtr;
6246
result = XML_ERROR_RECURSIVE_ENTITY_REF;
6247
goto endEntityValue;
6248
}
6249
if (entity->systemId) {
6250
if (parser->m_externalEntityRefHandler) {
6251
dtd->paramEntityRead = XML_FALSE;
6252
entity->open = XML_TRUE;
6253
entityTrackingOnOpen(parser, entity, __LINE__);
6254
if (! parser->m_externalEntityRefHandler(
6255
parser->m_externalEntityRefHandlerArg, 0, entity->base,
6256
entity->systemId, entity->publicId)) {
6257
entityTrackingOnClose(parser, entity, __LINE__);
6258
entity->open = XML_FALSE;
6259
result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6260
goto endEntityValue;
6261
}
6262
entityTrackingOnClose(parser, entity, __LINE__);
6263
entity->open = XML_FALSE;
6264
if (! dtd->paramEntityRead)
6265
dtd->keepProcessing = dtd->standalone;
6266
} else
6267
dtd->keepProcessing = dtd->standalone;
6268
} else {
6269
entity->open = XML_TRUE;
6270
entityTrackingOnOpen(parser, entity, __LINE__);
6271
result = storeEntityValue(
6272
parser, parser->m_internalEncoding, (const char *)entity->textPtr,
6273
(const char *)(entity->textPtr + entity->textLen),
6274
XML_ACCOUNT_ENTITY_EXPANSION);
6275
entityTrackingOnClose(parser, entity, __LINE__);
6276
entity->open = XML_FALSE;
6277
if (result)
6278
goto endEntityValue;
6279
}
6280
break;
6281
}
6282
# endif /* XML_DTD */
6283
/* In the internal subset, PE references are not legal
6284
within markup declarations, e.g entity values in this case. */
6285
parser->m_eventPtr = entityTextPtr;
6286
result = XML_ERROR_PARAM_ENTITY_REF;
6287
goto endEntityValue;
6288
case XML_TOK_NONE:
6289
result = XML_ERROR_NONE;
6290
goto endEntityValue;
6291
case XML_TOK_ENTITY_REF:
6292
case XML_TOK_DATA_CHARS:
6293
if (! poolAppend(pool, enc, entityTextPtr, next)) {
6294
result = XML_ERROR_NO_MEMORY;
6295
goto endEntityValue;
6296
}
6297
break;
6298
case XML_TOK_TRAILING_CR:
6299
next = entityTextPtr + enc->minBytesPerChar;
6300
/* fall through */
6301
case XML_TOK_DATA_NEWLINE:
6302
if (pool->end == pool->ptr && ! poolGrow(pool)) {
6303
result = XML_ERROR_NO_MEMORY;
6304
goto endEntityValue;
6305
}
6306
*(pool->ptr)++ = 0xA;
6307
break;
6308
case XML_TOK_CHAR_REF: {
6309
XML_Char buf[XML_ENCODE_MAX];
6310
int i;
6311
int n = XmlCharRefNumber(enc, entityTextPtr);
6312
if (n < 0) {
6313
if (enc == parser->m_encoding)
6314
parser->m_eventPtr = entityTextPtr;
6315
result = XML_ERROR_BAD_CHAR_REF;
6316
goto endEntityValue;
6317
}
6318
n = XmlEncode(n, (ICHAR *)buf);
6319
/* The XmlEncode() functions can never return 0 here. That
6320
* error return happens if the code point passed in is either
6321
* negative or greater than or equal to 0x110000. The
6322
* XmlCharRefNumber() functions will all return a number
6323
* strictly less than 0x110000 or a negative value if an error
6324
* occurred. The negative value is intercepted above, so
6325
* XmlEncode() is never passed a value it might return an
6326
* error for.
6327
*/
6328
for (i = 0; i < n; i++) {
6329
if (pool->end == pool->ptr && ! poolGrow(pool)) {
6330
result = XML_ERROR_NO_MEMORY;
6331
goto endEntityValue;
6332
}
6333
*(pool->ptr)++ = buf[i];
6334
}
6335
} break;
6336
case XML_TOK_PARTIAL:
6337
if (enc == parser->m_encoding)
6338
parser->m_eventPtr = entityTextPtr;
6339
result = XML_ERROR_INVALID_TOKEN;
6340
goto endEntityValue;
6341
case XML_TOK_INVALID:
6342
if (enc == parser->m_encoding)
6343
parser->m_eventPtr = next;
6344
result = XML_ERROR_INVALID_TOKEN;
6345
goto endEntityValue;
6346
default:
6347
/* This default case should be unnecessary -- all the tokens
6348
* that XmlEntityValueTok() can return have their own explicit
6349
* cases -- but should be retained for safety. We do however
6350
* exclude it from the coverage statistics.
6351
*
6352
* LCOV_EXCL_START
6353
*/
6354
if (enc == parser->m_encoding)
6355
parser->m_eventPtr = entityTextPtr;
6356
result = XML_ERROR_UNEXPECTED_STATE;
6357
goto endEntityValue;
6358
/* LCOV_EXCL_STOP */
6359
}
6360
entityTextPtr = next;
6361
}
6362
endEntityValue:
6363
# ifdef XML_DTD
6364
parser->m_prologState.inEntityValue = oldInEntityValue;
6365
# endif /* XML_DTD */
6366
return result;
6367
}
6368
6369
#else /* XML_GE == 0 */
6370
6371
static enum XML_Error
6372
storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
6373
// This will store "&amp;entity123;" in entity->textPtr
6374
// to end up as "&entity123;" in the handler.
6375
const char *const entity_start = "&amp;";
6376
const char *const entity_end = ";";
6377
6378
STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
6379
if (! poolAppendString(pool, entity_start)
6380
|| ! poolAppendString(pool, entity->name)
6381
|| ! poolAppendString(pool, entity_end)) {
6382
poolDiscard(pool);
6383
return XML_ERROR_NO_MEMORY;
6384
}
6385
6386
entity->textPtr = poolStart(pool);
6387
entity->textLen = (int)(poolLength(pool));
6388
poolFinish(pool);
6389
6390
return XML_ERROR_NONE;
6391
}
6392
6393
#endif /* XML_GE == 0 */
6394
6395
static void FASTCALL
6396
normalizeLines(XML_Char *s) {
6397
XML_Char *p;
6398
for (;; s++) {
6399
if (*s == XML_T('\0'))
6400
return;
6401
if (*s == 0xD)
6402
break;
6403
}
6404
p = s;
6405
do {
6406
if (*s == 0xD) {
6407
*p++ = 0xA;
6408
if (*++s == 0xA)
6409
s++;
6410
} else
6411
*p++ = *s++;
6412
} while (*s);
6413
*p = XML_T('\0');
6414
}
6415
6416
static int
6417
reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6418
const char *start, const char *end) {
6419
const XML_Char *target;
6420
XML_Char *data;
6421
const char *tem;
6422
if (! parser->m_processingInstructionHandler) {
6423
if (parser->m_defaultHandler)
6424
reportDefault(parser, enc, start, end);
6425
return 1;
6426
}
6427
start += enc->minBytesPerChar * 2;
6428
tem = start + XmlNameLength(enc, start);
6429
target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6430
if (! target)
6431
return 0;
6432
poolFinish(&parser->m_tempPool);
6433
data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6434
end - enc->minBytesPerChar * 2);
6435
if (! data)
6436
return 0;
6437
normalizeLines(data);
6438
parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6439
poolClear(&parser->m_tempPool);
6440
return 1;
6441
}
6442
6443
static int
6444
reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6445
const char *end) {
6446
XML_Char *data;
6447
if (! parser->m_commentHandler) {
6448
if (parser->m_defaultHandler)
6449
reportDefault(parser, enc, start, end);
6450
return 1;
6451
}
6452
data = poolStoreString(&parser->m_tempPool, enc,
6453
start + enc->minBytesPerChar * 4,
6454
end - enc->minBytesPerChar * 3);
6455
if (! data)
6456
return 0;
6457
normalizeLines(data);
6458
parser->m_commentHandler(parser->m_handlerArg, data);
6459
poolClear(&parser->m_tempPool);
6460
return 1;
6461
}
6462
6463
static void
6464
reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6465
const char *end) {
6466
if (MUST_CONVERT(enc, s)) {
6467
enum XML_Convert_Result convert_res;
6468
const char **eventPP;
6469
const char **eventEndPP;
6470
if (enc == parser->m_encoding) {
6471
eventPP = &parser->m_eventPtr;
6472
eventEndPP = &parser->m_eventEndPtr;
6473
} else {
6474
/* To get here, two things must be true; the parser must be
6475
* using a character encoding that is not the same as the
6476
* encoding passed in, and the encoding passed in must need
6477
* conversion to the internal format (UTF-8 unless XML_UNICODE
6478
* is defined). The only occasions on which the encoding passed
6479
* in is not the same as the parser's encoding are when it is
6480
* the internal encoding (e.g. a previously defined parameter
6481
* entity, already converted to internal format). This by
6482
* definition doesn't need conversion, so the whole branch never
6483
* gets executed.
6484
*
6485
* For safety's sake we don't delete these lines and merely
6486
* exclude them from coverage statistics.
6487
*
6488
* LCOV_EXCL_START
6489
*/
6490
eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6491
eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6492
/* LCOV_EXCL_STOP */
6493
}
6494
do {
6495
ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6496
convert_res
6497
= XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6498
*eventEndPP = s;
6499
parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6500
(int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6501
*eventPP = s;
6502
} while ((convert_res != XML_CONVERT_COMPLETED)
6503
&& (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6504
} else
6505
parser->m_defaultHandler(
6506
parser->m_handlerArg, (const XML_Char *)s,
6507
(int)((const XML_Char *)end - (const XML_Char *)s));
6508
}
6509
6510
static int
6511
defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6512
XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6513
DEFAULT_ATTRIBUTE *att;
6514
if (value || isId) {
6515
/* The handling of default attributes gets messed up if we have
6516
a default which duplicates a non-default. */
6517
int i;
6518
for (i = 0; i < type->nDefaultAtts; i++)
6519
if (attId == type->defaultAtts[i].id)
6520
return 1;
6521
if (isId && ! type->idAtt && ! attId->xmlns)
6522
type->idAtt = attId;
6523
}
6524
if (type->nDefaultAtts == type->allocDefaultAtts) {
6525
if (type->allocDefaultAtts == 0) {
6526
type->allocDefaultAtts = 8;
6527
type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6528
parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6529
if (! type->defaultAtts) {
6530
type->allocDefaultAtts = 0;
6531
return 0;
6532
}
6533
} else {
6534
DEFAULT_ATTRIBUTE *temp;
6535
6536
/* Detect and prevent integer overflow */
6537
if (type->allocDefaultAtts > INT_MAX / 2) {
6538
return 0;
6539
}
6540
6541
int count = type->allocDefaultAtts * 2;
6542
6543
/* Detect and prevent integer overflow.
6544
* The preprocessor guard addresses the "always false" warning
6545
* from -Wtype-limits on platforms where
6546
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6547
#if UINT_MAX >= SIZE_MAX
6548
if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6549
return 0;
6550
}
6551
#endif
6552
6553
temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6554
(count * sizeof(DEFAULT_ATTRIBUTE)));
6555
if (temp == NULL)
6556
return 0;
6557
type->allocDefaultAtts = count;
6558
type->defaultAtts = temp;
6559
}
6560
}
6561
att = type->defaultAtts + type->nDefaultAtts;
6562
att->id = attId;
6563
att->value = value;
6564
att->isCdata = isCdata;
6565
if (! isCdata)
6566
attId->maybeTokenized = XML_TRUE;
6567
type->nDefaultAtts += 1;
6568
return 1;
6569
}
6570
6571
static int
6572
setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6573
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6574
const XML_Char *name;
6575
for (name = elementType->name; *name; name++) {
6576
if (*name == XML_T(ASCII_COLON)) {
6577
PREFIX *prefix;
6578
const XML_Char *s;
6579
for (s = elementType->name; s != name; s++) {
6580
if (! poolAppendChar(&dtd->pool, *s))
6581
return 0;
6582
}
6583
if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6584
return 0;
6585
prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6586
sizeof(PREFIX));
6587
if (! prefix)
6588
return 0;
6589
if (prefix->name == poolStart(&dtd->pool))
6590
poolFinish(&dtd->pool);
6591
else
6592
poolDiscard(&dtd->pool);
6593
elementType->prefix = prefix;
6594
break;
6595
}
6596
}
6597
return 1;
6598
}
6599
6600
static ATTRIBUTE_ID *
6601
getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6602
const char *end) {
6603
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6604
ATTRIBUTE_ID *id;
6605
const XML_Char *name;
6606
if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6607
return NULL;
6608
name = poolStoreString(&dtd->pool, enc, start, end);
6609
if (! name)
6610
return NULL;
6611
/* skip quotation mark - its storage will be reused (like in name[-1]) */
6612
++name;
6613
id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6614
sizeof(ATTRIBUTE_ID));
6615
if (! id)
6616
return NULL;
6617
if (id->name != name)
6618
poolDiscard(&dtd->pool);
6619
else {
6620
poolFinish(&dtd->pool);
6621
if (! parser->m_ns)
6622
;
6623
else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6624
&& name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6625
&& name[4] == XML_T(ASCII_s)
6626
&& (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6627
if (name[5] == XML_T('\0'))
6628
id->prefix = &dtd->defaultPrefix;
6629
else
6630
id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6631
sizeof(PREFIX));
6632
id->xmlns = XML_TRUE;
6633
} else {
6634
int i;
6635
for (i = 0; name[i]; i++) {
6636
/* attributes without prefix are *not* in the default namespace */
6637
if (name[i] == XML_T(ASCII_COLON)) {
6638
int j;
6639
for (j = 0; j < i; j++) {
6640
if (! poolAppendChar(&dtd->pool, name[j]))
6641
return NULL;
6642
}
6643
if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6644
return NULL;
6645
id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6646
poolStart(&dtd->pool), sizeof(PREFIX));
6647
if (! id->prefix)
6648
return NULL;
6649
if (id->prefix->name == poolStart(&dtd->pool))
6650
poolFinish(&dtd->pool);
6651
else
6652
poolDiscard(&dtd->pool);
6653
break;
6654
}
6655
}
6656
}
6657
}
6658
return id;
6659
}
6660
6661
#define CONTEXT_SEP XML_T(ASCII_FF)
6662
6663
static const XML_Char *
6664
getContext(XML_Parser parser) {
6665
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6666
HASH_TABLE_ITER iter;
6667
XML_Bool needSep = XML_FALSE;
6668
6669
if (dtd->defaultPrefix.binding) {
6670
int i;
6671
int len;
6672
if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6673
return NULL;
6674
len = dtd->defaultPrefix.binding->uriLen;
6675
if (parser->m_namespaceSeparator)
6676
len--;
6677
for (i = 0; i < len; i++) {
6678
if (! poolAppendChar(&parser->m_tempPool,
6679
dtd->defaultPrefix.binding->uri[i])) {
6680
/* Because of memory caching, I don't believe this line can be
6681
* executed.
6682
*
6683
* This is part of a loop copying the default prefix binding
6684
* URI into the parser's temporary string pool. Previously,
6685
* that URI was copied into the same string pool, with a
6686
* terminating NUL character, as part of setContext(). When
6687
* the pool was cleared, that leaves a block definitely big
6688
* enough to hold the URI on the free block list of the pool.
6689
* The URI copy in getContext() therefore cannot run out of
6690
* memory.
6691
*
6692
* If the pool is used between the setContext() and
6693
* getContext() calls, the worst it can do is leave a bigger
6694
* block on the front of the free list. Given that this is
6695
* all somewhat inobvious and program logic can be changed, we
6696
* don't delete the line but we do exclude it from the test
6697
* coverage statistics.
6698
*/
6699
return NULL; /* LCOV_EXCL_LINE */
6700
}
6701
}
6702
needSep = XML_TRUE;
6703
}
6704
6705
hashTableIterInit(&iter, &(dtd->prefixes));
6706
for (;;) {
6707
int i;
6708
int len;
6709
const XML_Char *s;
6710
PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6711
if (! prefix)
6712
break;
6713
if (! prefix->binding) {
6714
/* This test appears to be (justifiable) paranoia. There does
6715
* not seem to be a way of injecting a prefix without a binding
6716
* that doesn't get errored long before this function is called.
6717
* The test should remain for safety's sake, so we instead
6718
* exclude the following line from the coverage statistics.
6719
*/
6720
continue; /* LCOV_EXCL_LINE */
6721
}
6722
if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6723
return NULL;
6724
for (s = prefix->name; *s; s++)
6725
if (! poolAppendChar(&parser->m_tempPool, *s))
6726
return NULL;
6727
if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6728
return NULL;
6729
len = prefix->binding->uriLen;
6730
if (parser->m_namespaceSeparator)
6731
len--;
6732
for (i = 0; i < len; i++)
6733
if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6734
return NULL;
6735
needSep = XML_TRUE;
6736
}
6737
6738
hashTableIterInit(&iter, &(dtd->generalEntities));
6739
for (;;) {
6740
const XML_Char *s;
6741
ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6742
if (! e)
6743
break;
6744
if (! e->open)
6745
continue;
6746
if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6747
return NULL;
6748
for (s = e->name; *s; s++)
6749
if (! poolAppendChar(&parser->m_tempPool, *s))
6750
return 0;
6751
needSep = XML_TRUE;
6752
}
6753
6754
if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6755
return NULL;
6756
return parser->m_tempPool.start;
6757
}
6758
6759
static XML_Bool
6760
setContext(XML_Parser parser, const XML_Char *context) {
6761
if (context == NULL) {
6762
return XML_FALSE;
6763
}
6764
6765
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6766
const XML_Char *s = context;
6767
6768
while (*context != XML_T('\0')) {
6769
if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6770
ENTITY *e;
6771
if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6772
return XML_FALSE;
6773
e = (ENTITY *)lookup(parser, &dtd->generalEntities,
6774
poolStart(&parser->m_tempPool), 0);
6775
if (e)
6776
e->open = XML_TRUE;
6777
if (*s != XML_T('\0'))
6778
s++;
6779
context = s;
6780
poolDiscard(&parser->m_tempPool);
6781
} else if (*s == XML_T(ASCII_EQUALS)) {
6782
PREFIX *prefix;
6783
if (poolLength(&parser->m_tempPool) == 0)
6784
prefix = &dtd->defaultPrefix;
6785
else {
6786
if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6787
return XML_FALSE;
6788
prefix
6789
= (PREFIX *)lookup(parser, &dtd->prefixes,
6790
poolStart(&parser->m_tempPool), sizeof(PREFIX));
6791
if (! prefix)
6792
return XML_FALSE;
6793
if (prefix->name == poolStart(&parser->m_tempPool)) {
6794
prefix->name = poolCopyString(&dtd->pool, prefix->name);
6795
if (! prefix->name)
6796
return XML_FALSE;
6797
}
6798
poolDiscard(&parser->m_tempPool);
6799
}
6800
for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
6801
context++)
6802
if (! poolAppendChar(&parser->m_tempPool, *context))
6803
return XML_FALSE;
6804
if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6805
return XML_FALSE;
6806
if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6807
&parser->m_inheritedBindings)
6808
!= XML_ERROR_NONE)
6809
return XML_FALSE;
6810
poolDiscard(&parser->m_tempPool);
6811
if (*context != XML_T('\0'))
6812
++context;
6813
s = context;
6814
} else {
6815
if (! poolAppendChar(&parser->m_tempPool, *s))
6816
return XML_FALSE;
6817
s++;
6818
}
6819
}
6820
return XML_TRUE;
6821
}
6822
6823
static void FASTCALL
6824
normalizePublicId(XML_Char *publicId) {
6825
XML_Char *p = publicId;
6826
XML_Char *s;
6827
for (s = publicId; *s; s++) {
6828
switch (*s) {
6829
case 0x20:
6830
case 0xD:
6831
case 0xA:
6832
if (p != publicId && p[-1] != 0x20)
6833
*p++ = 0x20;
6834
break;
6835
default:
6836
*p++ = *s;
6837
}
6838
}
6839
if (p != publicId && p[-1] == 0x20)
6840
--p;
6841
*p = XML_T('\0');
6842
}
6843
6844
static DTD *
6845
dtdCreate(const XML_Memory_Handling_Suite *ms) {
6846
DTD *p = ms->malloc_fcn(sizeof(DTD));
6847
if (p == NULL)
6848
return p;
6849
poolInit(&(p->pool), ms);
6850
poolInit(&(p->entityValuePool), ms);
6851
hashTableInit(&(p->generalEntities), ms);
6852
hashTableInit(&(p->elementTypes), ms);
6853
hashTableInit(&(p->attributeIds), ms);
6854
hashTableInit(&(p->prefixes), ms);
6855
#ifdef XML_DTD
6856
p->paramEntityRead = XML_FALSE;
6857
hashTableInit(&(p->paramEntities), ms);
6858
#endif /* XML_DTD */
6859
p->defaultPrefix.name = NULL;
6860
p->defaultPrefix.binding = NULL;
6861
6862
p->in_eldecl = XML_FALSE;
6863
p->scaffIndex = NULL;
6864
p->scaffold = NULL;
6865
p->scaffLevel = 0;
6866
p->scaffSize = 0;
6867
p->scaffCount = 0;
6868
p->contentStringLen = 0;
6869
6870
p->keepProcessing = XML_TRUE;
6871
p->hasParamEntityRefs = XML_FALSE;
6872
p->standalone = XML_FALSE;
6873
return p;
6874
}
6875
6876
static void
6877
dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
6878
HASH_TABLE_ITER iter;
6879
hashTableIterInit(&iter, &(p->elementTypes));
6880
for (;;) {
6881
ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6882
if (! e)
6883
break;
6884
if (e->allocDefaultAtts != 0)
6885
ms->free_fcn(e->defaultAtts);
6886
}
6887
hashTableClear(&(p->generalEntities));
6888
#ifdef XML_DTD
6889
p->paramEntityRead = XML_FALSE;
6890
hashTableClear(&(p->paramEntities));
6891
#endif /* XML_DTD */
6892
hashTableClear(&(p->elementTypes));
6893
hashTableClear(&(p->attributeIds));
6894
hashTableClear(&(p->prefixes));
6895
poolClear(&(p->pool));
6896
poolClear(&(p->entityValuePool));
6897
p->defaultPrefix.name = NULL;
6898
p->defaultPrefix.binding = NULL;
6899
6900
p->in_eldecl = XML_FALSE;
6901
6902
ms->free_fcn(p->scaffIndex);
6903
p->scaffIndex = NULL;
6904
ms->free_fcn(p->scaffold);
6905
p->scaffold = NULL;
6906
6907
p->scaffLevel = 0;
6908
p->scaffSize = 0;
6909
p->scaffCount = 0;
6910
p->contentStringLen = 0;
6911
6912
p->keepProcessing = XML_TRUE;
6913
p->hasParamEntityRefs = XML_FALSE;
6914
p->standalone = XML_FALSE;
6915
}
6916
6917
static void
6918
dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
6919
HASH_TABLE_ITER iter;
6920
hashTableIterInit(&iter, &(p->elementTypes));
6921
for (;;) {
6922
ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6923
if (! e)
6924
break;
6925
if (e->allocDefaultAtts != 0)
6926
ms->free_fcn(e->defaultAtts);
6927
}
6928
hashTableDestroy(&(p->generalEntities));
6929
#ifdef XML_DTD
6930
hashTableDestroy(&(p->paramEntities));
6931
#endif /* XML_DTD */
6932
hashTableDestroy(&(p->elementTypes));
6933
hashTableDestroy(&(p->attributeIds));
6934
hashTableDestroy(&(p->prefixes));
6935
poolDestroy(&(p->pool));
6936
poolDestroy(&(p->entityValuePool));
6937
if (isDocEntity) {
6938
ms->free_fcn(p->scaffIndex);
6939
ms->free_fcn(p->scaffold);
6940
}
6941
ms->free_fcn(p);
6942
}
6943
6944
/* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6945
The new DTD has already been initialized.
6946
*/
6947
static int
6948
dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6949
const XML_Memory_Handling_Suite *ms) {
6950
HASH_TABLE_ITER iter;
6951
6952
/* Copy the prefix table. */
6953
6954
hashTableIterInit(&iter, &(oldDtd->prefixes));
6955
for (;;) {
6956
const XML_Char *name;
6957
const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6958
if (! oldP)
6959
break;
6960
name = poolCopyString(&(newDtd->pool), oldP->name);
6961
if (! name)
6962
return 0;
6963
if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6964
return 0;
6965
}
6966
6967
hashTableIterInit(&iter, &(oldDtd->attributeIds));
6968
6969
/* Copy the attribute id table. */
6970
6971
for (;;) {
6972
ATTRIBUTE_ID *newA;
6973
const XML_Char *name;
6974
const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6975
6976
if (! oldA)
6977
break;
6978
/* Remember to allocate the scratch byte before the name. */
6979
if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
6980
return 0;
6981
name = poolCopyString(&(newDtd->pool), oldA->name);
6982
if (! name)
6983
return 0;
6984
++name;
6985
newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6986
sizeof(ATTRIBUTE_ID));
6987
if (! newA)
6988
return 0;
6989
newA->maybeTokenized = oldA->maybeTokenized;
6990
if (oldA->prefix) {
6991
newA->xmlns = oldA->xmlns;
6992
if (oldA->prefix == &oldDtd->defaultPrefix)
6993
newA->prefix = &newDtd->defaultPrefix;
6994
else
6995
newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6996
oldA->prefix->name, 0);
6997
}
6998
}
6999
7000
/* Copy the element type table. */
7001
7002
hashTableIterInit(&iter, &(oldDtd->elementTypes));
7003
7004
for (;;) {
7005
int i;
7006
ELEMENT_TYPE *newE;
7007
const XML_Char *name;
7008
const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7009
if (! oldE)
7010
break;
7011
name = poolCopyString(&(newDtd->pool), oldE->name);
7012
if (! name)
7013
return 0;
7014
newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7015
sizeof(ELEMENT_TYPE));
7016
if (! newE)
7017
return 0;
7018
if (oldE->nDefaultAtts) {
7019
newE->defaultAtts
7020
= ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7021
if (! newE->defaultAtts) {
7022
return 0;
7023
}
7024
}
7025
if (oldE->idAtt)
7026
newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7027
oldE->idAtt->name, 0);
7028
newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7029
if (oldE->prefix)
7030
newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7031
oldE->prefix->name, 0);
7032
for (i = 0; i < newE->nDefaultAtts; i++) {
7033
newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7034
oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
7035
newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7036
if (oldE->defaultAtts[i].value) {
7037
newE->defaultAtts[i].value
7038
= poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7039
if (! newE->defaultAtts[i].value)
7040
return 0;
7041
} else
7042
newE->defaultAtts[i].value = NULL;
7043
}
7044
}
7045
7046
/* Copy the entity tables. */
7047
if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7048
&(oldDtd->generalEntities)))
7049
return 0;
7050
7051
#ifdef XML_DTD
7052
if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7053
&(oldDtd->paramEntities)))
7054
return 0;
7055
newDtd->paramEntityRead = oldDtd->paramEntityRead;
7056
#endif /* XML_DTD */
7057
7058
newDtd->keepProcessing = oldDtd->keepProcessing;
7059
newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7060
newDtd->standalone = oldDtd->standalone;
7061
7062
/* Don't want deep copying for scaffolding */
7063
newDtd->in_eldecl = oldDtd->in_eldecl;
7064
newDtd->scaffold = oldDtd->scaffold;
7065
newDtd->contentStringLen = oldDtd->contentStringLen;
7066
newDtd->scaffSize = oldDtd->scaffSize;
7067
newDtd->scaffLevel = oldDtd->scaffLevel;
7068
newDtd->scaffIndex = oldDtd->scaffIndex;
7069
7070
return 1;
7071
} /* End dtdCopy */
7072
7073
static int
7074
copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7075
STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7076
HASH_TABLE_ITER iter;
7077
const XML_Char *cachedOldBase = NULL;
7078
const XML_Char *cachedNewBase = NULL;
7079
7080
hashTableIterInit(&iter, oldTable);
7081
7082
for (;;) {
7083
ENTITY *newE;
7084
const XML_Char *name;
7085
const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7086
if (! oldE)
7087
break;
7088
name = poolCopyString(newPool, oldE->name);
7089
if (! name)
7090
return 0;
7091
newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7092
if (! newE)
7093
return 0;
7094
if (oldE->systemId) {
7095
const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7096
if (! tem)
7097
return 0;
7098
newE->systemId = tem;
7099
if (oldE->base) {
7100
if (oldE->base == cachedOldBase)
7101
newE->base = cachedNewBase;
7102
else {
7103
cachedOldBase = oldE->base;
7104
tem = poolCopyString(newPool, cachedOldBase);
7105
if (! tem)
7106
return 0;
7107
cachedNewBase = newE->base = tem;
7108
}
7109
}
7110
if (oldE->publicId) {
7111
tem = poolCopyString(newPool, oldE->publicId);
7112
if (! tem)
7113
return 0;
7114
newE->publicId = tem;
7115
}
7116
} else {
7117
const XML_Char *tem
7118
= poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7119
if (! tem)
7120
return 0;
7121
newE->textPtr = tem;
7122
newE->textLen = oldE->textLen;
7123
}
7124
if (oldE->notation) {
7125
const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7126
if (! tem)
7127
return 0;
7128
newE->notation = tem;
7129
}
7130
newE->is_param = oldE->is_param;
7131
newE->is_internal = oldE->is_internal;
7132
}
7133
return 1;
7134
}
7135
7136
#define INIT_POWER 6
7137
7138
static XML_Bool FASTCALL
7139
keyeq(KEY s1, KEY s2) {
7140
for (; *s1 == *s2; s1++, s2++)
7141
if (*s1 == 0)
7142
return XML_TRUE;
7143
return XML_FALSE;
7144
}
7145
7146
static size_t
7147
keylen(KEY s) {
7148
size_t len = 0;
7149
for (; *s; s++, len++)
7150
;
7151
return len;
7152
}
7153
7154
static void
7155
copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7156
key->k[0] = 0;
7157
key->k[1] = get_hash_secret_salt(parser);
7158
}
7159
7160
static unsigned long FASTCALL
7161
hash(XML_Parser parser, KEY s) {
7162
struct siphash state;
7163
struct sipkey key;
7164
(void)sip24_valid;
7165
copy_salt_to_sipkey(parser, &key);
7166
sip24_init(&state, &key);
7167
sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7168
return (unsigned long)sip24_final(&state);
7169
}
7170
7171
static NAMED *
7172
lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7173
size_t i;
7174
if (table->size == 0) {
7175
size_t tsize;
7176
if (! createSize)
7177
return NULL;
7178
table->power = INIT_POWER;
7179
/* table->size is a power of 2 */
7180
table->size = (size_t)1 << INIT_POWER;
7181
tsize = table->size * sizeof(NAMED *);
7182
table->v = table->mem->malloc_fcn(tsize);
7183
if (! table->v) {
7184
table->size = 0;
7185
return NULL;
7186
}
7187
memset(table->v, 0, tsize);
7188
i = hash(parser, name) & ((unsigned long)table->size - 1);
7189
} else {
7190
unsigned long h = hash(parser, name);
7191
unsigned long mask = (unsigned long)table->size - 1;
7192
unsigned char step = 0;
7193
i = h & mask;
7194
while (table->v[i]) {
7195
if (keyeq(name, table->v[i]->name))
7196
return table->v[i];
7197
if (! step)
7198
step = PROBE_STEP(h, mask, table->power);
7199
i < step ? (i += table->size - step) : (i -= step);
7200
}
7201
if (! createSize)
7202
return NULL;
7203
7204
/* check for overflow (table is half full) */
7205
if (table->used >> (table->power - 1)) {
7206
unsigned char newPower = table->power + 1;
7207
7208
/* Detect and prevent invalid shift */
7209
if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7210
return NULL;
7211
}
7212
7213
size_t newSize = (size_t)1 << newPower;
7214
unsigned long newMask = (unsigned long)newSize - 1;
7215
7216
/* Detect and prevent integer overflow */
7217
if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
7218
return NULL;
7219
}
7220
7221
size_t tsize = newSize * sizeof(NAMED *);
7222
NAMED **newV = table->mem->malloc_fcn(tsize);
7223
if (! newV)
7224
return NULL;
7225
memset(newV, 0, tsize);
7226
for (i = 0; i < table->size; i++)
7227
if (table->v[i]) {
7228
unsigned long newHash = hash(parser, table->v[i]->name);
7229
size_t j = newHash & newMask;
7230
step = 0;
7231
while (newV[j]) {
7232
if (! step)
7233
step = PROBE_STEP(newHash, newMask, newPower);
7234
j < step ? (j += newSize - step) : (j -= step);
7235
}
7236
newV[j] = table->v[i];
7237
}
7238
table->mem->free_fcn(table->v);
7239
table->v = newV;
7240
table->power = newPower;
7241
table->size = newSize;
7242
i = h & newMask;
7243
step = 0;
7244
while (table->v[i]) {
7245
if (! step)
7246
step = PROBE_STEP(h, newMask, newPower);
7247
i < step ? (i += newSize - step) : (i -= step);
7248
}
7249
}
7250
}
7251
table->v[i] = table->mem->malloc_fcn(createSize);
7252
if (! table->v[i])
7253
return NULL;
7254
memset(table->v[i], 0, createSize);
7255
table->v[i]->name = name;
7256
(table->used)++;
7257
return table->v[i];
7258
}
7259
7260
static void FASTCALL
7261
hashTableClear(HASH_TABLE *table) {
7262
size_t i;
7263
for (i = 0; i < table->size; i++) {
7264
table->mem->free_fcn(table->v[i]);
7265
table->v[i] = NULL;
7266
}
7267
table->used = 0;
7268
}
7269
7270
static void FASTCALL
7271
hashTableDestroy(HASH_TABLE *table) {
7272
size_t i;
7273
for (i = 0; i < table->size; i++)
7274
table->mem->free_fcn(table->v[i]);
7275
table->mem->free_fcn(table->v);
7276
}
7277
7278
static void FASTCALL
7279
hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
7280
p->power = 0;
7281
p->size = 0;
7282
p->used = 0;
7283
p->v = NULL;
7284
p->mem = ms;
7285
}
7286
7287
static void FASTCALL
7288
hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7289
iter->p = table->v;
7290
iter->end = iter->p ? iter->p + table->size : NULL;
7291
}
7292
7293
static NAMED *FASTCALL
7294
hashTableIterNext(HASH_TABLE_ITER *iter) {
7295
while (iter->p != iter->end) {
7296
NAMED *tem = *(iter->p)++;
7297
if (tem)
7298
return tem;
7299
}
7300
return NULL;
7301
}
7302
7303
static void FASTCALL
7304
poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7305
pool->blocks = NULL;
7306
pool->freeBlocks = NULL;
7307
pool->start = NULL;
7308
pool->ptr = NULL;
7309
pool->end = NULL;
7310
pool->mem = ms;
7311
}
7312
7313
static void FASTCALL
7314
poolClear(STRING_POOL *pool) {
7315
if (! pool->freeBlocks)
7316
pool->freeBlocks = pool->blocks;
7317
else {
7318
BLOCK *p = pool->blocks;
7319
while (p) {
7320
BLOCK *tem = p->next;
7321
p->next = pool->freeBlocks;
7322
pool->freeBlocks = p;
7323
p = tem;
7324
}
7325
}
7326
pool->blocks = NULL;
7327
pool->start = NULL;
7328
pool->ptr = NULL;
7329
pool->end = NULL;
7330
}
7331
7332
static void FASTCALL
7333
poolDestroy(STRING_POOL *pool) {
7334
BLOCK *p = pool->blocks;
7335
while (p) {
7336
BLOCK *tem = p->next;
7337
pool->mem->free_fcn(p);
7338
p = tem;
7339
}
7340
p = pool->freeBlocks;
7341
while (p) {
7342
BLOCK *tem = p->next;
7343
pool->mem->free_fcn(p);
7344
p = tem;
7345
}
7346
}
7347
7348
static XML_Char *
7349
poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7350
const char *end) {
7351
if (! pool->ptr && ! poolGrow(pool))
7352
return NULL;
7353
for (;;) {
7354
const enum XML_Convert_Result convert_res = XmlConvert(
7355
enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
7356
if ((convert_res == XML_CONVERT_COMPLETED)
7357
|| (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7358
break;
7359
if (! poolGrow(pool))
7360
return NULL;
7361
}
7362
return pool->start;
7363
}
7364
7365
static const XML_Char *FASTCALL
7366
poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7367
do {
7368
if (! poolAppendChar(pool, *s))
7369
return NULL;
7370
} while (*s++);
7371
s = pool->start;
7372
poolFinish(pool);
7373
return s;
7374
}
7375
7376
static const XML_Char *
7377
poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7378
if (! pool->ptr && ! poolGrow(pool)) {
7379
/* The following line is unreachable given the current usage of
7380
* poolCopyStringN(). Currently it is called from exactly one
7381
* place to copy the text of a simple general entity. By that
7382
* point, the name of the entity is already stored in the pool, so
7383
* pool->ptr cannot be NULL.
7384
*
7385
* If poolCopyStringN() is used elsewhere as it well might be,
7386
* this line may well become executable again. Regardless, this
7387
* sort of check shouldn't be removed lightly, so we just exclude
7388
* it from the coverage statistics.
7389
*/
7390
return NULL; /* LCOV_EXCL_LINE */
7391
}
7392
for (; n > 0; --n, s++) {
7393
if (! poolAppendChar(pool, *s))
7394
return NULL;
7395
}
7396
s = pool->start;
7397
poolFinish(pool);
7398
return s;
7399
}
7400
7401
static const XML_Char *FASTCALL
7402
poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7403
while (*s) {
7404
if (! poolAppendChar(pool, *s))
7405
return NULL;
7406
s++;
7407
}
7408
return pool->start;
7409
}
7410
7411
static XML_Char *
7412
poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7413
const char *end) {
7414
if (! poolAppend(pool, enc, ptr, end))
7415
return NULL;
7416
if (pool->ptr == pool->end && ! poolGrow(pool))
7417
return NULL;
7418
*(pool->ptr)++ = 0;
7419
return pool->start;
7420
}
7421
7422
static size_t
7423
poolBytesToAllocateFor(int blockSize) {
7424
/* Unprotected math would be:
7425
** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7426
**
7427
** Detect overflow, avoiding _signed_ overflow undefined behavior
7428
** For a + b * c we check b * c in isolation first, so that addition of a
7429
** on top has no chance of making us accept a small non-negative number
7430
*/
7431
const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7432
7433
if (blockSize <= 0)
7434
return 0;
7435
7436
if (blockSize > (int)(INT_MAX / stretch))
7437
return 0;
7438
7439
{
7440
const int stretchedBlockSize = blockSize * (int)stretch;
7441
const int bytesToAllocate
7442
= (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7443
if (bytesToAllocate < 0)
7444
return 0;
7445
7446
return (size_t)bytesToAllocate;
7447
}
7448
}
7449
7450
static XML_Bool FASTCALL
7451
poolGrow(STRING_POOL *pool) {
7452
if (pool->freeBlocks) {
7453
if (pool->start == 0) {
7454
pool->blocks = pool->freeBlocks;
7455
pool->freeBlocks = pool->freeBlocks->next;
7456
pool->blocks->next = NULL;
7457
pool->start = pool->blocks->s;
7458
pool->end = pool->start + pool->blocks->size;
7459
pool->ptr = pool->start;
7460
return XML_TRUE;
7461
}
7462
if (pool->end - pool->start < pool->freeBlocks->size) {
7463
BLOCK *tem = pool->freeBlocks->next;
7464
pool->freeBlocks->next = pool->blocks;
7465
pool->blocks = pool->freeBlocks;
7466
pool->freeBlocks = tem;
7467
memcpy(pool->blocks->s, pool->start,
7468
(pool->end - pool->start) * sizeof(XML_Char));
7469
pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7470
pool->start = pool->blocks->s;
7471
pool->end = pool->start + pool->blocks->size;
7472
return XML_TRUE;
7473
}
7474
}
7475
if (pool->blocks && pool->start == pool->blocks->s) {
7476
BLOCK *temp;
7477
int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7478
size_t bytesToAllocate;
7479
7480
/* NOTE: Needs to be calculated prior to calling `realloc`
7481
to avoid dangling pointers: */
7482
const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7483
7484
if (blockSize < 0) {
7485
/* This condition traps a situation where either more than
7486
* INT_MAX/2 bytes have already been allocated. This isn't
7487
* readily testable, since it is unlikely that an average
7488
* machine will have that much memory, so we exclude it from the
7489
* coverage statistics.
7490
*/
7491
return XML_FALSE; /* LCOV_EXCL_LINE */
7492
}
7493
7494
bytesToAllocate = poolBytesToAllocateFor(blockSize);
7495
if (bytesToAllocate == 0)
7496
return XML_FALSE;
7497
7498
temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7499
(unsigned)bytesToAllocate);
7500
if (temp == NULL)
7501
return XML_FALSE;
7502
pool->blocks = temp;
7503
pool->blocks->size = blockSize;
7504
pool->ptr = pool->blocks->s + offsetInsideBlock;
7505
pool->start = pool->blocks->s;
7506
pool->end = pool->start + blockSize;
7507
} else {
7508
BLOCK *tem;
7509
int blockSize = (int)(pool->end - pool->start);
7510
size_t bytesToAllocate;
7511
7512
if (blockSize < 0) {
7513
/* This condition traps a situation where either more than
7514
* INT_MAX bytes have already been allocated (which is prevented
7515
* by various pieces of program logic, not least this one, never
7516
* mind the unlikelihood of actually having that much memory) or
7517
* the pool control fields have been corrupted (which could
7518
* conceivably happen in an extremely buggy user handler
7519
* function). Either way it isn't readily testable, so we
7520
* exclude it from the coverage statistics.
7521
*/
7522
return XML_FALSE; /* LCOV_EXCL_LINE */
7523
}
7524
7525
if (blockSize < INIT_BLOCK_SIZE)
7526
blockSize = INIT_BLOCK_SIZE;
7527
else {
7528
/* Detect overflow, avoiding _signed_ overflow undefined behavior */
7529
if ((int)((unsigned)blockSize * 2U) < 0) {
7530
return XML_FALSE;
7531
}
7532
blockSize *= 2;
7533
}
7534
7535
bytesToAllocate = poolBytesToAllocateFor(blockSize);
7536
if (bytesToAllocate == 0)
7537
return XML_FALSE;
7538
7539
tem = pool->mem->malloc_fcn(bytesToAllocate);
7540
if (! tem)
7541
return XML_FALSE;
7542
tem->size = blockSize;
7543
tem->next = pool->blocks;
7544
pool->blocks = tem;
7545
if (pool->ptr != pool->start)
7546
memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7547
pool->ptr = tem->s + (pool->ptr - pool->start);
7548
pool->start = tem->s;
7549
pool->end = tem->s + blockSize;
7550
}
7551
return XML_TRUE;
7552
}
7553
7554
static int FASTCALL
7555
nextScaffoldPart(XML_Parser parser) {
7556
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7557
CONTENT_SCAFFOLD *me;
7558
int next;
7559
7560
if (! dtd->scaffIndex) {
7561
dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7562
if (! dtd->scaffIndex)
7563
return -1;
7564
dtd->scaffIndex[0] = 0;
7565
}
7566
7567
if (dtd->scaffCount >= dtd->scaffSize) {
7568
CONTENT_SCAFFOLD *temp;
7569
if (dtd->scaffold) {
7570
/* Detect and prevent integer overflow */
7571
if (dtd->scaffSize > UINT_MAX / 2u) {
7572
return -1;
7573
}
7574
/* Detect and prevent integer overflow.
7575
* The preprocessor guard addresses the "always false" warning
7576
* from -Wtype-limits on platforms where
7577
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7578
#if UINT_MAX >= SIZE_MAX
7579
if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7580
return -1;
7581
}
7582
#endif
7583
7584
temp = (CONTENT_SCAFFOLD *)REALLOC(
7585
parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7586
if (temp == NULL)
7587
return -1;
7588
dtd->scaffSize *= 2;
7589
} else {
7590
temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7591
* sizeof(CONTENT_SCAFFOLD));
7592
if (temp == NULL)
7593
return -1;
7594
dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7595
}
7596
dtd->scaffold = temp;
7597
}
7598
next = dtd->scaffCount++;
7599
me = &dtd->scaffold[next];
7600
if (dtd->scaffLevel) {
7601
CONTENT_SCAFFOLD *parent
7602
= &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7603
if (parent->lastchild) {
7604
dtd->scaffold[parent->lastchild].nextsib = next;
7605
}
7606
if (! parent->childcnt)
7607
parent->firstchild = next;
7608
parent->lastchild = next;
7609
parent->childcnt++;
7610
}
7611
me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7612
return next;
7613
}
7614
7615
static XML_Content *
7616
build_model(XML_Parser parser) {
7617
/* Function build_model transforms the existing parser->m_dtd->scaffold
7618
* array of CONTENT_SCAFFOLD tree nodes into a new array of
7619
* XML_Content tree nodes followed by a gapless list of zero-terminated
7620
* strings. */
7621
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7622
XML_Content *ret;
7623
XML_Char *str; /* the current string writing location */
7624
7625
/* Detect and prevent integer overflow.
7626
* The preprocessor guard addresses the "always false" warning
7627
* from -Wtype-limits on platforms where
7628
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7629
#if UINT_MAX >= SIZE_MAX
7630
if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7631
return NULL;
7632
}
7633
if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7634
return NULL;
7635
}
7636
#endif
7637
if (dtd->scaffCount * sizeof(XML_Content)
7638
> (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7639
return NULL;
7640
}
7641
7642
const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7643
+ (dtd->contentStringLen * sizeof(XML_Char)));
7644
7645
ret = (XML_Content *)MALLOC(parser, allocsize);
7646
if (! ret)
7647
return NULL;
7648
7649
/* What follows is an iterative implementation (of what was previously done
7650
* recursively in a dedicated function called "build_node". The old recursive
7651
* build_node could be forced into stack exhaustion from input as small as a
7652
* few megabyte, and so that was a security issue. Hence, a function call
7653
* stack is avoided now by resolving recursion.)
7654
*
7655
* The iterative approach works as follows:
7656
*
7657
* - We have two writing pointers, both walking up the result array; one does
7658
* the work, the other creates "jobs" for its colleague to do, and leads
7659
* the way:
7660
*
7661
* - The faster one, pointer jobDest, always leads and writes "what job
7662
* to do" by the other, once they reach that place in the
7663
* array: leader "jobDest" stores the source node array index (relative
7664
* to array dtd->scaffold) in field "numchildren".
7665
*
7666
* - The slower one, pointer dest, looks at the value stored in the
7667
* "numchildren" field (which actually holds a source node array index
7668
* at that time) and puts the real data from dtd->scaffold in.
7669
*
7670
* - Before the loop starts, jobDest writes source array index 0
7671
* (where the root node is located) so that dest will have something to do
7672
* when it starts operation.
7673
*
7674
* - Whenever nodes with children are encountered, jobDest appends
7675
* them as new jobs, in order. As a result, tree node siblings are
7676
* adjacent in the resulting array, for example:
7677
*
7678
* [0] root, has two children
7679
* [1] first child of 0, has three children
7680
* [3] first child of 1, does not have children
7681
* [4] second child of 1, does not have children
7682
* [5] third child of 1, does not have children
7683
* [2] second child of 0, does not have children
7684
*
7685
* Or (the same data) presented in flat array view:
7686
*
7687
* [0] root, has two children
7688
*
7689
* [1] first child of 0, has three children
7690
* [2] second child of 0, does not have children
7691
*
7692
* [3] first child of 1, does not have children
7693
* [4] second child of 1, does not have children
7694
* [5] third child of 1, does not have children
7695
*
7696
* - The algorithm repeats until all target array indices have been processed.
7697
*/
7698
XML_Content *dest = ret; /* tree node writing location, moves upwards */
7699
XML_Content *const destLimit = &ret[dtd->scaffCount];
7700
XML_Content *jobDest = ret; /* next free writing location in target array */
7701
str = (XML_Char *)&ret[dtd->scaffCount];
7702
7703
/* Add the starting job, the root node (index 0) of the source tree */
7704
(jobDest++)->numchildren = 0;
7705
7706
for (; dest < destLimit; dest++) {
7707
/* Retrieve source tree array index from job storage */
7708
const int src_node = (int)dest->numchildren;
7709
7710
/* Convert item */
7711
dest->type = dtd->scaffold[src_node].type;
7712
dest->quant = dtd->scaffold[src_node].quant;
7713
if (dest->type == XML_CTYPE_NAME) {
7714
const XML_Char *src;
7715
dest->name = str;
7716
src = dtd->scaffold[src_node].name;
7717
for (;;) {
7718
*str++ = *src;
7719
if (! *src)
7720
break;
7721
src++;
7722
}
7723
dest->numchildren = 0;
7724
dest->children = NULL;
7725
} else {
7726
unsigned int i;
7727
int cn;
7728
dest->name = NULL;
7729
dest->numchildren = dtd->scaffold[src_node].childcnt;
7730
dest->children = jobDest;
7731
7732
/* Append scaffold indices of children to array */
7733
for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7734
i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
7735
(jobDest++)->numchildren = (unsigned int)cn;
7736
}
7737
}
7738
7739
return ret;
7740
}
7741
7742
static ELEMENT_TYPE *
7743
getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
7744
const char *end) {
7745
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7746
const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7747
ELEMENT_TYPE *ret;
7748
7749
if (! name)
7750
return NULL;
7751
ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
7752
sizeof(ELEMENT_TYPE));
7753
if (! ret)
7754
return NULL;
7755
if (ret->name != name)
7756
poolDiscard(&dtd->pool);
7757
else {
7758
poolFinish(&dtd->pool);
7759
if (! setElementTypePrefix(parser, ret))
7760
return NULL;
7761
}
7762
return ret;
7763
}
7764
7765
static XML_Char *
7766
copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
7767
size_t charsRequired = 0;
7768
XML_Char *result;
7769
7770
/* First determine how long the string is */
7771
while (s[charsRequired] != 0) {
7772
charsRequired++;
7773
}
7774
/* Include the terminator */
7775
charsRequired++;
7776
7777
/* Now allocate space for the copy */
7778
result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7779
if (result == NULL)
7780
return NULL;
7781
/* Copy the original into place */
7782
memcpy(result, s, charsRequired * sizeof(XML_Char));
7783
return result;
7784
}
7785
7786
#if XML_GE == 1
7787
7788
static float
7789
accountingGetCurrentAmplification(XML_Parser rootParser) {
7790
// 1.........1.........12 => 22
7791
const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
7792
const XmlBigCount countBytesOutput
7793
= rootParser->m_accounting.countBytesDirect
7794
+ rootParser->m_accounting.countBytesIndirect;
7795
const float amplificationFactor
7796
= rootParser->m_accounting.countBytesDirect
7797
? (countBytesOutput
7798
/ (float)(rootParser->m_accounting.countBytesDirect))
7799
: ((lenOfShortestInclude
7800
+ rootParser->m_accounting.countBytesIndirect)
7801
/ (float)lenOfShortestInclude);
7802
assert(! rootParser->m_parentParser);
7803
return amplificationFactor;
7804
}
7805
7806
static void
7807
accountingReportStats(XML_Parser originParser, const char *epilog) {
7808
const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7809
assert(! rootParser->m_parentParser);
7810
7811
if (rootParser->m_accounting.debugLevel == 0u) {
7812
return;
7813
}
7814
7815
const float amplificationFactor
7816
= accountingGetCurrentAmplification(rootParser);
7817
fprintf(stderr,
7818
"expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
7819
"10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
7820
(void *)rootParser, rootParser->m_accounting.countBytesDirect,
7821
rootParser->m_accounting.countBytesIndirect,
7822
(double)amplificationFactor, epilog);
7823
}
7824
7825
static void
7826
accountingOnAbort(XML_Parser originParser) {
7827
accountingReportStats(originParser, " ABORTING\n");
7828
}
7829
7830
static void
7831
accountingReportDiff(XML_Parser rootParser,
7832
unsigned int levelsAwayFromRootParser, const char *before,
7833
const char *after, ptrdiff_t bytesMore, int source_line,
7834
enum XML_Account account) {
7835
assert(! rootParser->m_parentParser);
7836
7837
fprintf(stderr,
7838
" (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
7839
bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
7840
levelsAwayFromRootParser, source_line, 10, "");
7841
7842
const char ellipis[] = "[..]";
7843
const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
7844
const unsigned int contextLength = 10;
7845
7846
/* Note: Performance is of no concern here */
7847
const char *walker = before;
7848
if ((rootParser->m_accounting.debugLevel >= 3u)
7849
|| (after - before)
7850
<= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
7851
for (; walker < after; walker++) {
7852
fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7853
}
7854
} else {
7855
for (; walker < before + contextLength; walker++) {
7856
fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7857
}
7858
fprintf(stderr, ellipis);
7859
walker = after - contextLength;
7860
for (; walker < after; walker++) {
7861
fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7862
}
7863
}
7864
fprintf(stderr, "\"\n");
7865
}
7866
7867
static XML_Bool
7868
accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
7869
const char *after, int source_line,
7870
enum XML_Account account) {
7871
/* Note: We need to check the token type *first* to be sure that
7872
* we can even access variable <after>, safely.
7873
* E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
7874
switch (tok) {
7875
case XML_TOK_INVALID:
7876
case XML_TOK_PARTIAL:
7877
case XML_TOK_PARTIAL_CHAR:
7878
case XML_TOK_NONE:
7879
return XML_TRUE;
7880
}
7881
7882
if (account == XML_ACCOUNT_NONE)
7883
return XML_TRUE; /* because these bytes have been accounted for, already */
7884
7885
unsigned int levelsAwayFromRootParser;
7886
const XML_Parser rootParser
7887
= getRootParserOf(originParser, &levelsAwayFromRootParser);
7888
assert(! rootParser->m_parentParser);
7889
7890
const int isDirect
7891
= (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
7892
const ptrdiff_t bytesMore = after - before;
7893
7894
XmlBigCount *const additionTarget
7895
= isDirect ? &rootParser->m_accounting.countBytesDirect
7896
: &rootParser->m_accounting.countBytesIndirect;
7897
7898
/* Detect and avoid integer overflow */
7899
if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
7900
return XML_FALSE;
7901
*additionTarget += bytesMore;
7902
7903
const XmlBigCount countBytesOutput
7904
= rootParser->m_accounting.countBytesDirect
7905
+ rootParser->m_accounting.countBytesIndirect;
7906
const float amplificationFactor
7907
= accountingGetCurrentAmplification(rootParser);
7908
const XML_Bool tolerated
7909
= (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
7910
|| (amplificationFactor
7911
<= rootParser->m_accounting.maximumAmplificationFactor);
7912
7913
if (rootParser->m_accounting.debugLevel >= 2u) {
7914
accountingReportStats(rootParser, "");
7915
accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
7916
bytesMore, source_line, account);
7917
}
7918
7919
return tolerated;
7920
}
7921
7922
unsigned long long
7923
testingAccountingGetCountBytesDirect(XML_Parser parser) {
7924
if (! parser)
7925
return 0;
7926
return parser->m_accounting.countBytesDirect;
7927
}
7928
7929
unsigned long long
7930
testingAccountingGetCountBytesIndirect(XML_Parser parser) {
7931
if (! parser)
7932
return 0;
7933
return parser->m_accounting.countBytesIndirect;
7934
}
7935
7936
static void
7937
entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
7938
const char *action, int sourceLine) {
7939
assert(! rootParser->m_parentParser);
7940
if (rootParser->m_entity_stats.debugLevel == 0u)
7941
return;
7942
7943
# if defined(XML_UNICODE)
7944
const char *const entityName = "[..]";
7945
# else
7946
const char *const entityName = entity->name;
7947
# endif
7948
7949
fprintf(
7950
stderr,
7951
"expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
7952
(void *)rootParser, rootParser->m_entity_stats.countEverOpened,
7953
rootParser->m_entity_stats.currentDepth,
7954
rootParser->m_entity_stats.maximumDepthSeen,
7955
(rootParser->m_entity_stats.currentDepth - 1) * 2, "",
7956
entity->is_param ? "%" : "&", entityName, action, entity->textLen,
7957
sourceLine);
7958
}
7959
7960
static void
7961
entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7962
const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7963
assert(! rootParser->m_parentParser);
7964
7965
rootParser->m_entity_stats.countEverOpened++;
7966
rootParser->m_entity_stats.currentDepth++;
7967
if (rootParser->m_entity_stats.currentDepth
7968
> rootParser->m_entity_stats.maximumDepthSeen) {
7969
rootParser->m_entity_stats.maximumDepthSeen++;
7970
}
7971
7972
entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
7973
}
7974
7975
static void
7976
entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7977
const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7978
assert(! rootParser->m_parentParser);
7979
7980
entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
7981
rootParser->m_entity_stats.currentDepth--;
7982
}
7983
7984
static XML_Parser
7985
getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
7986
XML_Parser rootParser = parser;
7987
unsigned int stepsTakenUpwards = 0;
7988
while (rootParser->m_parentParser) {
7989
rootParser = rootParser->m_parentParser;
7990
stepsTakenUpwards++;
7991
}
7992
assert(! rootParser->m_parentParser);
7993
if (outLevelDiff != NULL) {
7994
*outLevelDiff = stepsTakenUpwards;
7995
}
7996
return rootParser;
7997
}
7998
7999
const char *
8000
unsignedCharToPrintable(unsigned char c) {
8001
switch (c) {
8002
case 0:
8003
return "\\0";
8004
case 1:
8005
return "\\x1";
8006
case 2:
8007
return "\\x2";
8008
case 3:
8009
return "\\x3";
8010
case 4:
8011
return "\\x4";
8012
case 5:
8013
return "\\x5";
8014
case 6:
8015
return "\\x6";
8016
case 7:
8017
return "\\x7";
8018
case 8:
8019
return "\\x8";
8020
case 9:
8021
return "\\t";
8022
case 10:
8023
return "\\n";
8024
case 11:
8025
return "\\xB";
8026
case 12:
8027
return "\\xC";
8028
case 13:
8029
return "\\r";
8030
case 14:
8031
return "\\xE";
8032
case 15:
8033
return "\\xF";
8034
case 16:
8035
return "\\x10";
8036
case 17:
8037
return "\\x11";
8038
case 18:
8039
return "\\x12";
8040
case 19:
8041
return "\\x13";
8042
case 20:
8043
return "\\x14";
8044
case 21:
8045
return "\\x15";
8046
case 22:
8047
return "\\x16";
8048
case 23:
8049
return "\\x17";
8050
case 24:
8051
return "\\x18";
8052
case 25:
8053
return "\\x19";
8054
case 26:
8055
return "\\x1A";
8056
case 27:
8057
return "\\x1B";
8058
case 28:
8059
return "\\x1C";
8060
case 29:
8061
return "\\x1D";
8062
case 30:
8063
return "\\x1E";
8064
case 31:
8065
return "\\x1F";
8066
case 32:
8067
return " ";
8068
case 33:
8069
return "!";
8070
case 34:
8071
return "\\\"";
8072
case 35:
8073
return "#";
8074
case 36:
8075
return "$";
8076
case 37:
8077
return "%";
8078
case 38:
8079
return "&";
8080
case 39:
8081
return "'";
8082
case 40:
8083
return "(";
8084
case 41:
8085
return ")";
8086
case 42:
8087
return "*";
8088
case 43:
8089
return "+";
8090
case 44:
8091
return ",";
8092
case 45:
8093
return "-";
8094
case 46:
8095
return ".";
8096
case 47:
8097
return "/";
8098
case 48:
8099
return "0";
8100
case 49:
8101
return "1";
8102
case 50:
8103
return "2";
8104
case 51:
8105
return "3";
8106
case 52:
8107
return "4";
8108
case 53:
8109
return "5";
8110
case 54:
8111
return "6";
8112
case 55:
8113
return "7";
8114
case 56:
8115
return "8";
8116
case 57:
8117
return "9";
8118
case 58:
8119
return ":";
8120
case 59:
8121
return ";";
8122
case 60:
8123
return "<";
8124
case 61:
8125
return "=";
8126
case 62:
8127
return ">";
8128
case 63:
8129
return "?";
8130
case 64:
8131
return "@";
8132
case 65:
8133
return "A";
8134
case 66:
8135
return "B";
8136
case 67:
8137
return "C";
8138
case 68:
8139
return "D";
8140
case 69:
8141
return "E";
8142
case 70:
8143
return "F";
8144
case 71:
8145
return "G";
8146
case 72:
8147
return "H";
8148
case 73:
8149
return "I";
8150
case 74:
8151
return "J";
8152
case 75:
8153
return "K";
8154
case 76:
8155
return "L";
8156
case 77:
8157
return "M";
8158
case 78:
8159
return "N";
8160
case 79:
8161
return "O";
8162
case 80:
8163
return "P";
8164
case 81:
8165
return "Q";
8166
case 82:
8167
return "R";
8168
case 83:
8169
return "S";
8170
case 84:
8171
return "T";
8172
case 85:
8173
return "U";
8174
case 86:
8175
return "V";
8176
case 87:
8177
return "W";
8178
case 88:
8179
return "X";
8180
case 89:
8181
return "Y";
8182
case 90:
8183
return "Z";
8184
case 91:
8185
return "[";
8186
case 92:
8187
return "\\\\";
8188
case 93:
8189
return "]";
8190
case 94:
8191
return "^";
8192
case 95:
8193
return "_";
8194
case 96:
8195
return "`";
8196
case 97:
8197
return "a";
8198
case 98:
8199
return "b";
8200
case 99:
8201
return "c";
8202
case 100:
8203
return "d";
8204
case 101:
8205
return "e";
8206
case 102:
8207
return "f";
8208
case 103:
8209
return "g";
8210
case 104:
8211
return "h";
8212
case 105:
8213
return "i";
8214
case 106:
8215
return "j";
8216
case 107:
8217
return "k";
8218
case 108:
8219
return "l";
8220
case 109:
8221
return "m";
8222
case 110:
8223
return "n";
8224
case 111:
8225
return "o";
8226
case 112:
8227
return "p";
8228
case 113:
8229
return "q";
8230
case 114:
8231
return "r";
8232
case 115:
8233
return "s";
8234
case 116:
8235
return "t";
8236
case 117:
8237
return "u";
8238
case 118:
8239
return "v";
8240
case 119:
8241
return "w";
8242
case 120:
8243
return "x";
8244
case 121:
8245
return "y";
8246
case 122:
8247
return "z";
8248
case 123:
8249
return "{";
8250
case 124:
8251
return "|";
8252
case 125:
8253
return "}";
8254
case 126:
8255
return "~";
8256
case 127:
8257
return "\\x7F";
8258
case 128:
8259
return "\\x80";
8260
case 129:
8261
return "\\x81";
8262
case 130:
8263
return "\\x82";
8264
case 131:
8265
return "\\x83";
8266
case 132:
8267
return "\\x84";
8268
case 133:
8269
return "\\x85";
8270
case 134:
8271
return "\\x86";
8272
case 135:
8273
return "\\x87";
8274
case 136:
8275
return "\\x88";
8276
case 137:
8277
return "\\x89";
8278
case 138:
8279
return "\\x8A";
8280
case 139:
8281
return "\\x8B";
8282
case 140:
8283
return "\\x8C";
8284
case 141:
8285
return "\\x8D";
8286
case 142:
8287
return "\\x8E";
8288
case 143:
8289
return "\\x8F";
8290
case 144:
8291
return "\\x90";
8292
case 145:
8293
return "\\x91";
8294
case 146:
8295
return "\\x92";
8296
case 147:
8297
return "\\x93";
8298
case 148:
8299
return "\\x94";
8300
case 149:
8301
return "\\x95";
8302
case 150:
8303
return "\\x96";
8304
case 151:
8305
return "\\x97";
8306
case 152:
8307
return "\\x98";
8308
case 153:
8309
return "\\x99";
8310
case 154:
8311
return "\\x9A";
8312
case 155:
8313
return "\\x9B";
8314
case 156:
8315
return "\\x9C";
8316
case 157:
8317
return "\\x9D";
8318
case 158:
8319
return "\\x9E";
8320
case 159:
8321
return "\\x9F";
8322
case 160:
8323
return "\\xA0";
8324
case 161:
8325
return "\\xA1";
8326
case 162:
8327
return "\\xA2";
8328
case 163:
8329
return "\\xA3";
8330
case 164:
8331
return "\\xA4";
8332
case 165:
8333
return "\\xA5";
8334
case 166:
8335
return "\\xA6";
8336
case 167:
8337
return "\\xA7";
8338
case 168:
8339
return "\\xA8";
8340
case 169:
8341
return "\\xA9";
8342
case 170:
8343
return "\\xAA";
8344
case 171:
8345
return "\\xAB";
8346
case 172:
8347
return "\\xAC";
8348
case 173:
8349
return "\\xAD";
8350
case 174:
8351
return "\\xAE";
8352
case 175:
8353
return "\\xAF";
8354
case 176:
8355
return "\\xB0";
8356
case 177:
8357
return "\\xB1";
8358
case 178:
8359
return "\\xB2";
8360
case 179:
8361
return "\\xB3";
8362
case 180:
8363
return "\\xB4";
8364
case 181:
8365
return "\\xB5";
8366
case 182:
8367
return "\\xB6";
8368
case 183:
8369
return "\\xB7";
8370
case 184:
8371
return "\\xB8";
8372
case 185:
8373
return "\\xB9";
8374
case 186:
8375
return "\\xBA";
8376
case 187:
8377
return "\\xBB";
8378
case 188:
8379
return "\\xBC";
8380
case 189:
8381
return "\\xBD";
8382
case 190:
8383
return "\\xBE";
8384
case 191:
8385
return "\\xBF";
8386
case 192:
8387
return "\\xC0";
8388
case 193:
8389
return "\\xC1";
8390
case 194:
8391
return "\\xC2";
8392
case 195:
8393
return "\\xC3";
8394
case 196:
8395
return "\\xC4";
8396
case 197:
8397
return "\\xC5";
8398
case 198:
8399
return "\\xC6";
8400
case 199:
8401
return "\\xC7";
8402
case 200:
8403
return "\\xC8";
8404
case 201:
8405
return "\\xC9";
8406
case 202:
8407
return "\\xCA";
8408
case 203:
8409
return "\\xCB";
8410
case 204:
8411
return "\\xCC";
8412
case 205:
8413
return "\\xCD";
8414
case 206:
8415
return "\\xCE";
8416
case 207:
8417
return "\\xCF";
8418
case 208:
8419
return "\\xD0";
8420
case 209:
8421
return "\\xD1";
8422
case 210:
8423
return "\\xD2";
8424
case 211:
8425
return "\\xD3";
8426
case 212:
8427
return "\\xD4";
8428
case 213:
8429
return "\\xD5";
8430
case 214:
8431
return "\\xD6";
8432
case 215:
8433
return "\\xD7";
8434
case 216:
8435
return "\\xD8";
8436
case 217:
8437
return "\\xD9";
8438
case 218:
8439
return "\\xDA";
8440
case 219:
8441
return "\\xDB";
8442
case 220:
8443
return "\\xDC";
8444
case 221:
8445
return "\\xDD";
8446
case 222:
8447
return "\\xDE";
8448
case 223:
8449
return "\\xDF";
8450
case 224:
8451
return "\\xE0";
8452
case 225:
8453
return "\\xE1";
8454
case 226:
8455
return "\\xE2";
8456
case 227:
8457
return "\\xE3";
8458
case 228:
8459
return "\\xE4";
8460
case 229:
8461
return "\\xE5";
8462
case 230:
8463
return "\\xE6";
8464
case 231:
8465
return "\\xE7";
8466
case 232:
8467
return "\\xE8";
8468
case 233:
8469
return "\\xE9";
8470
case 234:
8471
return "\\xEA";
8472
case 235:
8473
return "\\xEB";
8474
case 236:
8475
return "\\xEC";
8476
case 237:
8477
return "\\xED";
8478
case 238:
8479
return "\\xEE";
8480
case 239:
8481
return "\\xEF";
8482
case 240:
8483
return "\\xF0";
8484
case 241:
8485
return "\\xF1";
8486
case 242:
8487
return "\\xF2";
8488
case 243:
8489
return "\\xF3";
8490
case 244:
8491
return "\\xF4";
8492
case 245:
8493
return "\\xF5";
8494
case 246:
8495
return "\\xF6";
8496
case 247:
8497
return "\\xF7";
8498
case 248:
8499
return "\\xF8";
8500
case 249:
8501
return "\\xF9";
8502
case 250:
8503
return "\\xFA";
8504
case 251:
8505
return "\\xFB";
8506
case 252:
8507
return "\\xFC";
8508
case 253:
8509
return "\\xFD";
8510
case 254:
8511
return "\\xFE";
8512
case 255:
8513
return "\\xFF";
8514
default:
8515
assert(0); /* never gets here */
8516
return "dead code";
8517
}
8518
assert(0); /* never gets here */
8519
}
8520
8521
#endif /* XML_GE == 1 */
8522
8523
static unsigned long
8524
getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8525
const char *const valueOrNull = getenv(variableName);
8526
if (valueOrNull == NULL) {
8527
return defaultDebugLevel;
8528
}
8529
const char *const value = valueOrNull;
8530
8531
errno = 0;
8532
char *afterValue = NULL;
8533
unsigned long debugLevel = strtoul(value, &afterValue, 10);
8534
if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
8535
errno = 0;
8536
return defaultDebugLevel;
8537
}
8538
8539
return debugLevel;
8540
}
8541
8542