Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/expat/lib/xmlparse.c
39478 views
1
/* 28bcd8b1ba7eb595d82822908257fd9c3589b4243e3c922d0369f35bfcd7b506 (2.7.3+)
2
__ __ _
3
___\ \/ /_ __ __ _| |_
4
/ _ \\ /| '_ \ / _` | __|
5
| __// \| |_) | (_| | |_
6
\___/_/\_\ .__/ \__,_|\__|
7
|_| XML parser
8
9
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10
Copyright (c) 2000 Clark Cooper <[email protected]>
11
Copyright (c) 2000-2006 Fred L. Drake, Jr. <[email protected]>
12
Copyright (c) 2001-2002 Greg Stein <[email protected]>
13
Copyright (c) 2002-2016 Karl Waclawek <[email protected]>
14
Copyright (c) 2005-2009 Steven Solie <[email protected]>
15
Copyright (c) 2016 Eric Rahm <[email protected]>
16
Copyright (c) 2016-2025 Sebastian Pipping <[email protected]>
17
Copyright (c) 2016 Gaurav <[email protected]>
18
Copyright (c) 2016 Thomas Beutlich <[email protected]>
19
Copyright (c) 2016 Gustavo Grieco <[email protected]>
20
Copyright (c) 2016 Pascal Cuoq <[email protected]>
21
Copyright (c) 2016 Ed Schouten <[email protected]>
22
Copyright (c) 2017-2022 Rhodri James <[email protected]>
23
Copyright (c) 2017 Václav Slavík <[email protected]>
24
Copyright (c) 2017 Viktor Szakats <[email protected]>
25
Copyright (c) 2017 Chanho Park <[email protected]>
26
Copyright (c) 2017 Rolf Eike Beer <[email protected]>
27
Copyright (c) 2017 Hans Wennborg <[email protected]>
28
Copyright (c) 2018 Anton Maklakov <[email protected]>
29
Copyright (c) 2018 Benjamin Peterson <[email protected]>
30
Copyright (c) 2018 Marco Maggi <[email protected]>
31
Copyright (c) 2018 Mariusz Zaborski <[email protected]>
32
Copyright (c) 2019 David Loffredo <[email protected]>
33
Copyright (c) 2019-2020 Ben Wagner <[email protected]>
34
Copyright (c) 2019 Vadim Zeitlin <[email protected]>
35
Copyright (c) 2021 Donghee Na <[email protected]>
36
Copyright (c) 2022 Samanta Navarro <[email protected]>
37
Copyright (c) 2022 Jeffrey Walton <[email protected]>
38
Copyright (c) 2022 Jann Horn <[email protected]>
39
Copyright (c) 2022 Sean McBride <[email protected]>
40
Copyright (c) 2023 Owain Davies <[email protected]>
41
Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <[email protected]>
42
Copyright (c) 2024-2025 Berkay Eren Ürün <[email protected]>
43
Copyright (c) 2024 Hanno Böck <[email protected]>
44
Copyright (c) 2025 Matthew Fernandez <[email protected]>
45
Licensed under the MIT license:
46
47
Permission is hereby granted, free of charge, to any person obtaining
48
a copy of this software and associated documentation files (the
49
"Software"), to deal in the Software without restriction, including
50
without limitation the rights to use, copy, modify, merge, publish,
51
distribute, sublicense, and/or sell copies of the Software, and to permit
52
persons to whom the Software is furnished to do so, subject to the
53
following conditions:
54
55
The above copyright notice and this permission notice shall be included
56
in all copies or substantial portions of the Software.
57
58
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
59
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
60
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
61
NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
62
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
63
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
64
USE OR OTHER DEALINGS IN THE SOFTWARE.
65
*/
66
67
#define XML_BUILDING_EXPAT 1
68
69
#include "expat_config.h"
70
71
#if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
72
# error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
73
#endif
74
75
#if defined(XML_DTD) && XML_GE == 0
76
# error Either undefine XML_DTD or define XML_GE to 1.
77
#endif
78
79
#if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \
80
|| (XML_CONTEXT_BYTES + 0 < 0)
81
# error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
82
#endif
83
84
#if defined(HAVE_SYSCALL_GETRANDOM)
85
# if ! defined(_GNU_SOURCE)
86
# define _GNU_SOURCE 1 /* syscall prototype */
87
# endif
88
#endif
89
90
#ifdef _WIN32
91
/* force stdlib to define rand_s() */
92
# if ! defined(_CRT_RAND_S)
93
# define _CRT_RAND_S
94
# endif
95
#endif
96
97
#include <stdbool.h>
98
#include <stddef.h>
99
#include <string.h> /* memset(), memcpy() */
100
#include <assert.h>
101
#include <limits.h> /* INT_MAX, UINT_MAX */
102
#include <stdio.h> /* fprintf */
103
#include <stdlib.h> /* getenv, rand_s */
104
#include <stdint.h> /* uintptr_t */
105
#include <math.h> /* isnan */
106
107
#ifdef _WIN32
108
# define getpid GetCurrentProcessId
109
#else
110
# include <sys/time.h> /* gettimeofday() */
111
# include <sys/types.h> /* getpid() */
112
# include <unistd.h> /* getpid() */
113
# include <fcntl.h> /* O_RDONLY */
114
# include <errno.h>
115
#endif
116
117
#ifdef _WIN32
118
# include "winconfig.h"
119
#endif
120
121
#include "ascii.h"
122
#include "expat.h"
123
#include "siphash.h"
124
125
#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
126
# if defined(HAVE_GETRANDOM)
127
# include <sys/random.h> /* getrandom */
128
# else
129
# include <unistd.h> /* syscall */
130
# include <sys/syscall.h> /* SYS_getrandom */
131
# endif
132
# if ! defined(GRND_NONBLOCK)
133
# define GRND_NONBLOCK 0x0001
134
# endif /* defined(GRND_NONBLOCK) */
135
#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
136
137
#if defined(HAVE_LIBBSD) \
138
&& (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
139
# include <bsd/stdlib.h>
140
#endif
141
142
#if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
143
# define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
144
#endif
145
146
#if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
147
&& ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
148
&& ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
149
&& ! defined(XML_POOR_ENTROPY)
150
# error You do not have support for any sources of high quality entropy \
151
enabled. For end user security, that is probably not what you want. \
152
\
153
Your options include: \
154
* Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
155
* Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
156
* BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
157
* BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
158
* libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
159
* libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
160
* Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
161
* Windows >=Vista (rand_s): _WIN32. \
162
\
163
If insist on not using any of these, bypass this error by defining \
164
XML_POOR_ENTROPY; you have been warned. \
165
\
166
If you have reasons to patch this detection code away or need changes \
167
to the build system, please open a bug. Thank you!
168
#endif
169
170
#ifdef XML_UNICODE
171
# define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
172
# define XmlConvert XmlUtf16Convert
173
# define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
174
# define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
175
# define XmlEncode XmlUtf16Encode
176
# define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
177
typedef unsigned short ICHAR;
178
#else
179
# define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
180
# define XmlConvert XmlUtf8Convert
181
# define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
182
# define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
183
# define XmlEncode XmlUtf8Encode
184
# define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
185
typedef char ICHAR;
186
#endif
187
188
#ifndef XML_NS
189
190
# define XmlInitEncodingNS XmlInitEncoding
191
# define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
192
# undef XmlGetInternalEncodingNS
193
# define XmlGetInternalEncodingNS XmlGetInternalEncoding
194
# define XmlParseXmlDeclNS XmlParseXmlDecl
195
196
#endif
197
198
#ifdef XML_UNICODE
199
200
# ifdef XML_UNICODE_WCHAR_T
201
# define XML_T(x) (const wchar_t) x
202
# define XML_L(x) L##x
203
# else
204
# define XML_T(x) (const unsigned short)x
205
# define XML_L(x) x
206
# endif
207
208
#else
209
210
# define XML_T(x) x
211
# define XML_L(x) x
212
213
#endif
214
215
/* Round up n to be a multiple of sz, where sz is a power of 2. */
216
#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
217
218
/* Do safe (NULL-aware) pointer arithmetic */
219
#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
220
221
#define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
222
223
#include "internal.h"
224
#include "xmltok.h"
225
#include "xmlrole.h"
226
227
typedef const XML_Char *KEY;
228
229
typedef struct {
230
KEY name;
231
} NAMED;
232
233
typedef struct {
234
NAMED **v;
235
unsigned char power;
236
size_t size;
237
size_t used;
238
XML_Parser parser;
239
} HASH_TABLE;
240
241
static size_t keylen(KEY s);
242
243
static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
244
245
/* For probing (after a collision) we need a step size relative prime
246
to the hash table size, which is a power of 2. We use double-hashing,
247
since we can calculate a second hash value cheaply by taking those bits
248
of the first hash value that were discarded (masked out) when the table
249
index was calculated: index = hash & mask, where mask = table->size - 1.
250
We limit the maximum step size to table->size / 4 (mask >> 2) and make
251
it odd, since odd numbers are always relative prime to a power of 2.
252
*/
253
#define SECOND_HASH(hash, mask, power) \
254
((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
255
#define PROBE_STEP(hash, mask, power) \
256
((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
257
258
typedef struct {
259
NAMED **p;
260
NAMED **end;
261
} HASH_TABLE_ITER;
262
263
#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
264
#define INIT_DATA_BUF_SIZE 1024
265
#define INIT_ATTS_SIZE 16
266
#define INIT_ATTS_VERSION 0xFFFFFFFF
267
#define INIT_BLOCK_SIZE 1024
268
#define INIT_BUFFER_SIZE 1024
269
270
#define EXPAND_SPARE 24
271
272
typedef struct binding {
273
struct prefix *prefix;
274
struct binding *nextTagBinding;
275
struct binding *prevPrefixBinding;
276
const struct attribute_id *attId;
277
XML_Char *uri;
278
int uriLen;
279
int uriAlloc;
280
} BINDING;
281
282
typedef struct prefix {
283
const XML_Char *name;
284
BINDING *binding;
285
} PREFIX;
286
287
typedef struct {
288
const XML_Char *str;
289
const XML_Char *localPart;
290
const XML_Char *prefix;
291
int strLen;
292
int uriLen;
293
int prefixLen;
294
} TAG_NAME;
295
296
/* TAG represents an open element.
297
The name of the element is stored in both the document and API
298
encodings. The memory buffer 'buf' is a separately-allocated
299
memory area which stores the name. During the XML_Parse()/
300
XML_ParseBuffer() when the element is open, the memory for the 'raw'
301
version of the name (in the document encoding) is shared with the
302
document buffer. If the element is open across calls to
303
XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
304
contain the 'raw' name as well.
305
306
A parser reuses these structures, maintaining a list of allocated
307
TAG objects in a free list.
308
*/
309
typedef struct tag {
310
struct tag *parent; /* parent of this element */
311
const char *rawName; /* tagName in the original encoding */
312
int rawNameLength;
313
TAG_NAME name; /* tagName in the API encoding */
314
char *buf; /* buffer for name components */
315
char *bufEnd; /* end of the buffer */
316
BINDING *bindings;
317
} TAG;
318
319
typedef struct {
320
const XML_Char *name;
321
const XML_Char *textPtr;
322
int textLen; /* length in XML_Chars */
323
int processed; /* # of processed bytes - when suspended */
324
const XML_Char *systemId;
325
const XML_Char *base;
326
const XML_Char *publicId;
327
const XML_Char *notation;
328
XML_Bool open;
329
XML_Bool hasMore; /* true if entity has not been completely processed */
330
/* An entity can be open while being already completely processed (hasMore ==
331
XML_FALSE). The reason is the delayed closing of entities until their inner
332
entities are processed and closed */
333
XML_Bool is_param;
334
XML_Bool is_internal; /* true if declared in internal subset outside PE */
335
} ENTITY;
336
337
typedef struct {
338
enum XML_Content_Type type;
339
enum XML_Content_Quant quant;
340
const XML_Char *name;
341
int firstchild;
342
int lastchild;
343
int childcnt;
344
int nextsib;
345
} CONTENT_SCAFFOLD;
346
347
#define INIT_SCAFFOLD_ELEMENTS 32
348
349
typedef struct block {
350
struct block *next;
351
int size;
352
XML_Char s[1];
353
} BLOCK;
354
355
typedef struct {
356
BLOCK *blocks;
357
BLOCK *freeBlocks;
358
const XML_Char *end;
359
XML_Char *ptr;
360
XML_Char *start;
361
XML_Parser parser;
362
} STRING_POOL;
363
364
/* The XML_Char before the name is used to determine whether
365
an attribute has been specified. */
366
typedef struct attribute_id {
367
XML_Char *name;
368
PREFIX *prefix;
369
XML_Bool maybeTokenized;
370
XML_Bool xmlns;
371
} ATTRIBUTE_ID;
372
373
typedef struct {
374
const ATTRIBUTE_ID *id;
375
XML_Bool isCdata;
376
const XML_Char *value;
377
} DEFAULT_ATTRIBUTE;
378
379
typedef struct {
380
unsigned long version;
381
unsigned long hash;
382
const XML_Char *uriName;
383
} NS_ATT;
384
385
typedef struct {
386
const XML_Char *name;
387
PREFIX *prefix;
388
const ATTRIBUTE_ID *idAtt;
389
int nDefaultAtts;
390
int allocDefaultAtts;
391
DEFAULT_ATTRIBUTE *defaultAtts;
392
} ELEMENT_TYPE;
393
394
typedef struct {
395
HASH_TABLE generalEntities;
396
HASH_TABLE elementTypes;
397
HASH_TABLE attributeIds;
398
HASH_TABLE prefixes;
399
STRING_POOL pool;
400
STRING_POOL entityValuePool;
401
/* false once a parameter entity reference has been skipped */
402
XML_Bool keepProcessing;
403
/* true once an internal or external PE reference has been encountered;
404
this includes the reference to an external subset */
405
XML_Bool hasParamEntityRefs;
406
XML_Bool standalone;
407
#ifdef XML_DTD
408
/* indicates if external PE has been read */
409
XML_Bool paramEntityRead;
410
HASH_TABLE paramEntities;
411
#endif /* XML_DTD */
412
PREFIX defaultPrefix;
413
/* === scaffolding for building content model === */
414
XML_Bool in_eldecl;
415
CONTENT_SCAFFOLD *scaffold;
416
unsigned contentStringLen;
417
unsigned scaffSize;
418
unsigned scaffCount;
419
int scaffLevel;
420
int *scaffIndex;
421
} DTD;
422
423
enum EntityType {
424
ENTITY_INTERNAL,
425
ENTITY_ATTRIBUTE,
426
ENTITY_VALUE,
427
};
428
429
typedef struct open_internal_entity {
430
const char *internalEventPtr;
431
const char *internalEventEndPtr;
432
struct open_internal_entity *next;
433
ENTITY *entity;
434
int startTagLevel;
435
XML_Bool betweenDecl; /* WFC: PE Between Declarations */
436
enum EntityType type;
437
} OPEN_INTERNAL_ENTITY;
438
439
enum XML_Account {
440
XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
441
XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
442
expansion */
443
XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
444
};
445
446
#if XML_GE == 1
447
typedef unsigned long long XmlBigCount;
448
typedef struct accounting {
449
XmlBigCount countBytesDirect;
450
XmlBigCount countBytesIndirect;
451
unsigned long debugLevel;
452
float maximumAmplificationFactor; // >=1.0
453
unsigned long long activationThresholdBytes;
454
} ACCOUNTING;
455
456
typedef struct MALLOC_TRACKER {
457
XmlBigCount bytesAllocated;
458
XmlBigCount peakBytesAllocated; // updated live only for debug level >=2
459
unsigned long debugLevel;
460
float maximumAmplificationFactor; // >=1.0
461
XmlBigCount activationThresholdBytes;
462
} MALLOC_TRACKER;
463
464
typedef struct entity_stats {
465
unsigned int countEverOpened;
466
unsigned int currentDepth;
467
unsigned int maximumDepthSeen;
468
unsigned long debugLevel;
469
} ENTITY_STATS;
470
#endif /* XML_GE == 1 */
471
472
typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
473
const char *end, const char **endPtr);
474
475
static Processor prologProcessor;
476
static Processor prologInitProcessor;
477
static Processor contentProcessor;
478
static Processor cdataSectionProcessor;
479
#ifdef XML_DTD
480
static Processor ignoreSectionProcessor;
481
static Processor externalParEntProcessor;
482
static Processor externalParEntInitProcessor;
483
static Processor entityValueProcessor;
484
static Processor entityValueInitProcessor;
485
#endif /* XML_DTD */
486
static Processor epilogProcessor;
487
static Processor errorProcessor;
488
static Processor externalEntityInitProcessor;
489
static Processor externalEntityInitProcessor2;
490
static Processor externalEntityInitProcessor3;
491
static Processor externalEntityContentProcessor;
492
static Processor internalEntityProcessor;
493
494
static enum XML_Error handleUnknownEncoding(XML_Parser parser,
495
const XML_Char *encodingName);
496
static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
497
const char *s, const char *next);
498
static enum XML_Error initializeEncoding(XML_Parser parser);
499
static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
500
const char *s, const char *end, int tok,
501
const char *next, const char **nextPtr,
502
XML_Bool haveMore, XML_Bool allowClosingDoctype,
503
enum XML_Account account);
504
static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity,
505
XML_Bool betweenDecl, enum EntityType type);
506
static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
507
const ENCODING *enc, const char *start,
508
const char *end, const char **endPtr,
509
XML_Bool haveMore, enum XML_Account account);
510
static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
511
const char **startPtr, const char *end,
512
const char **nextPtr, XML_Bool haveMore,
513
enum XML_Account account);
514
#ifdef XML_DTD
515
static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
516
const char **startPtr, const char *end,
517
const char **nextPtr, XML_Bool haveMore);
518
#endif /* XML_DTD */
519
520
static void freeBindings(XML_Parser parser, BINDING *bindings);
521
static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
522
const char *attStr, TAG_NAME *tagNamePtr,
523
BINDING **bindingsPtr,
524
enum XML_Account account);
525
static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
526
const ATTRIBUTE_ID *attId, const XML_Char *uri,
527
BINDING **bindingsPtr);
528
static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
529
XML_Bool isCdata, XML_Bool isId,
530
const XML_Char *value, XML_Parser parser);
531
static enum XML_Error storeAttributeValue(XML_Parser parser,
532
const ENCODING *enc, XML_Bool isCdata,
533
const char *ptr, const char *end,
534
STRING_POOL *pool,
535
enum XML_Account account);
536
static enum XML_Error
537
appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
538
const char *ptr, const char *end, STRING_POOL *pool,
539
enum XML_Account account, const char **nextPtr);
540
static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
541
const char *start, const char *end);
542
static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
543
#if XML_GE == 1
544
static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
545
const char *start, const char *end,
546
enum XML_Account account,
547
const char **nextPtr);
548
static enum XML_Error callStoreEntityValue(XML_Parser parser,
549
const ENCODING *enc,
550
const char *start, const char *end,
551
enum XML_Account account);
552
#else
553
static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
554
#endif
555
static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
556
const char *start, const char *end);
557
static int reportComment(XML_Parser parser, const ENCODING *enc,
558
const char *start, const char *end);
559
static void reportDefault(XML_Parser parser, const ENCODING *enc,
560
const char *start, const char *end);
561
562
static const XML_Char *getContext(XML_Parser parser);
563
static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
564
565
static void FASTCALL normalizePublicId(XML_Char *s);
566
567
static DTD *dtdCreate(XML_Parser parser);
568
/* do not call if m_parentParser != NULL */
569
static void dtdReset(DTD *p, XML_Parser parser);
570
static void dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser);
571
static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
572
XML_Parser parser);
573
static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
574
STRING_POOL *newPool, const HASH_TABLE *oldTable);
575
static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
576
size_t createSize);
577
static void FASTCALL hashTableInit(HASH_TABLE *table, XML_Parser parser);
578
static void FASTCALL hashTableClear(HASH_TABLE *table);
579
static void FASTCALL hashTableDestroy(HASH_TABLE *table);
580
static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
581
const HASH_TABLE *table);
582
static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
583
584
static void FASTCALL poolInit(STRING_POOL *pool, XML_Parser parser);
585
static void FASTCALL poolClear(STRING_POOL *pool);
586
static void FASTCALL poolDestroy(STRING_POOL *pool);
587
static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
588
const char *ptr, const char *end);
589
static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
590
const char *ptr, const char *end);
591
static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
592
static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
593
const XML_Char *s);
594
static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
595
int n);
596
static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
597
const XML_Char *s);
598
599
static int FASTCALL nextScaffoldPart(XML_Parser parser);
600
static XML_Content *build_model(XML_Parser parser);
601
static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
602
const char *ptr, const char *end);
603
604
static XML_Char *copyString(const XML_Char *s, XML_Parser parser);
605
606
static unsigned long generate_hash_secret_salt(XML_Parser parser);
607
static XML_Bool startParsing(XML_Parser parser);
608
609
static XML_Parser parserCreate(const XML_Char *encodingName,
610
const XML_Memory_Handling_Suite *memsuite,
611
const XML_Char *nameSep, DTD *dtd,
612
XML_Parser parentParser);
613
614
static void parserInit(XML_Parser parser, const XML_Char *encodingName);
615
616
#if XML_GE == 1
617
static float accountingGetCurrentAmplification(XML_Parser rootParser);
618
static void accountingReportStats(XML_Parser originParser, const char *epilog);
619
static void accountingOnAbort(XML_Parser originParser);
620
static void accountingReportDiff(XML_Parser rootParser,
621
unsigned int levelsAwayFromRootParser,
622
const char *before, const char *after,
623
ptrdiff_t bytesMore, int source_line,
624
enum XML_Account account);
625
static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
626
const char *before, const char *after,
627
int source_line,
628
enum XML_Account account);
629
630
static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
631
const char *action, int sourceLine);
632
static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
633
int sourceLine);
634
static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
635
int sourceLine);
636
#endif /* XML_GE == 1 */
637
638
static XML_Parser getRootParserOf(XML_Parser parser,
639
unsigned int *outLevelDiff);
640
641
static unsigned long getDebugLevel(const char *variableName,
642
unsigned long defaultDebugLevel);
643
644
#define poolStart(pool) ((pool)->start)
645
#define poolLength(pool) ((pool)->ptr - (pool)->start)
646
#define poolChop(pool) ((void)--(pool->ptr))
647
#define poolLastChar(pool) (((pool)->ptr)[-1])
648
#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
649
#define poolFinish(pool) ((pool)->start = (pool)->ptr)
650
#define poolAppendChar(pool, c) \
651
(((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
652
? 0 \
653
: ((*((pool)->ptr)++ = c), 1))
654
655
#if ! defined(XML_TESTING)
656
const
657
#endif
658
XML_Bool g_reparseDeferralEnabledDefault
659
= XML_TRUE; // write ONLY in runtests.c
660
#if defined(XML_TESTING)
661
unsigned int g_bytesScanned = 0; // used for testing only
662
#endif
663
664
struct XML_ParserStruct {
665
/* The first member must be m_userData so that the XML_GetUserData
666
macro works. */
667
void *m_userData;
668
void *m_handlerArg;
669
670
// How the four parse buffer pointers below relate in time and space:
671
//
672
// m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim
673
// | | | |
674
// <--parsed-->| | |
675
// <---parsing--->| |
676
// <--unoccupied-->|
677
// <---------total-malloced/realloced-------->|
678
679
char *m_buffer; // malloc/realloc base pointer of parse buffer
680
const XML_Memory_Handling_Suite m_mem;
681
const char *m_bufferPtr; // first character to be parsed
682
char *m_bufferEnd; // past last character to be parsed
683
const char *m_bufferLim; // allocated end of m_buffer
684
685
XML_Index m_parseEndByteIndex;
686
const char *m_parseEndPtr;
687
size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
688
XML_Bool m_reparseDeferralEnabled;
689
int m_lastBufferRequestSize;
690
XML_Char *m_dataBuf;
691
XML_Char *m_dataBufEnd;
692
XML_StartElementHandler m_startElementHandler;
693
XML_EndElementHandler m_endElementHandler;
694
XML_CharacterDataHandler m_characterDataHandler;
695
XML_ProcessingInstructionHandler m_processingInstructionHandler;
696
XML_CommentHandler m_commentHandler;
697
XML_StartCdataSectionHandler m_startCdataSectionHandler;
698
XML_EndCdataSectionHandler m_endCdataSectionHandler;
699
XML_DefaultHandler m_defaultHandler;
700
XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
701
XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
702
XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
703
XML_NotationDeclHandler m_notationDeclHandler;
704
XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
705
XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
706
XML_NotStandaloneHandler m_notStandaloneHandler;
707
XML_ExternalEntityRefHandler m_externalEntityRefHandler;
708
XML_Parser m_externalEntityRefHandlerArg;
709
XML_SkippedEntityHandler m_skippedEntityHandler;
710
XML_UnknownEncodingHandler m_unknownEncodingHandler;
711
XML_ElementDeclHandler m_elementDeclHandler;
712
XML_AttlistDeclHandler m_attlistDeclHandler;
713
XML_EntityDeclHandler m_entityDeclHandler;
714
XML_XmlDeclHandler m_xmlDeclHandler;
715
const ENCODING *m_encoding;
716
INIT_ENCODING m_initEncoding;
717
const ENCODING *m_internalEncoding;
718
const XML_Char *m_protocolEncodingName;
719
XML_Bool m_ns;
720
XML_Bool m_ns_triplets;
721
void *m_unknownEncodingMem;
722
void *m_unknownEncodingData;
723
void *m_unknownEncodingHandlerData;
724
void(XMLCALL *m_unknownEncodingRelease)(void *);
725
PROLOG_STATE m_prologState;
726
Processor *m_processor;
727
enum XML_Error m_errorCode;
728
const char *m_eventPtr;
729
const char *m_eventEndPtr;
730
const char *m_positionPtr;
731
OPEN_INTERNAL_ENTITY *m_openInternalEntities;
732
OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
733
OPEN_INTERNAL_ENTITY *m_openAttributeEntities;
734
OPEN_INTERNAL_ENTITY *m_freeAttributeEntities;
735
OPEN_INTERNAL_ENTITY *m_openValueEntities;
736
OPEN_INTERNAL_ENTITY *m_freeValueEntities;
737
XML_Bool m_defaultExpandInternalEntities;
738
int m_tagLevel;
739
ENTITY *m_declEntity;
740
const XML_Char *m_doctypeName;
741
const XML_Char *m_doctypeSysid;
742
const XML_Char *m_doctypePubid;
743
const XML_Char *m_declAttributeType;
744
const XML_Char *m_declNotationName;
745
const XML_Char *m_declNotationPublicId;
746
ELEMENT_TYPE *m_declElementType;
747
ATTRIBUTE_ID *m_declAttributeId;
748
XML_Bool m_declAttributeIsCdata;
749
XML_Bool m_declAttributeIsId;
750
DTD *m_dtd;
751
const XML_Char *m_curBase;
752
TAG *m_tagStack;
753
TAG *m_freeTagList;
754
BINDING *m_inheritedBindings;
755
BINDING *m_freeBindingList;
756
int m_attsSize;
757
int m_nSpecifiedAtts;
758
int m_idAttIndex;
759
ATTRIBUTE *m_atts;
760
NS_ATT *m_nsAtts;
761
unsigned long m_nsAttsVersion;
762
unsigned char m_nsAttsPower;
763
#ifdef XML_ATTR_INFO
764
XML_AttrInfo *m_attInfo;
765
#endif
766
POSITION m_position;
767
STRING_POOL m_tempPool;
768
STRING_POOL m_temp2Pool;
769
char *m_groupConnector;
770
unsigned int m_groupSize;
771
XML_Char m_namespaceSeparator;
772
XML_Parser m_parentParser;
773
XML_ParsingStatus m_parsingStatus;
774
#ifdef XML_DTD
775
XML_Bool m_isParamEntity;
776
XML_Bool m_useForeignDTD;
777
enum XML_ParamEntityParsing m_paramEntityParsing;
778
#endif
779
unsigned long m_hash_secret_salt;
780
#if XML_GE == 1
781
ACCOUNTING m_accounting;
782
MALLOC_TRACKER m_alloc_tracker;
783
ENTITY_STATS m_entity_stats;
784
#endif
785
XML_Bool m_reenter;
786
};
787
788
#if XML_GE == 1
789
# define MALLOC(parser, s) (expat_malloc((parser), (s), __LINE__))
790
# define REALLOC(parser, p, s) (expat_realloc((parser), (p), (s), __LINE__))
791
# define FREE(parser, p) (expat_free((parser), (p), __LINE__))
792
#else
793
# define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
794
# define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
795
# define FREE(parser, p) (parser->m_mem.free_fcn((p)))
796
#endif
797
798
#if XML_GE == 1
799
static void
800
expat_heap_stat(XML_Parser rootParser, char operator, XmlBigCount absDiff,
801
XmlBigCount newTotal, XmlBigCount peakTotal, int sourceLine) {
802
// NOTE: This can be +infinity or -nan
803
const float amplification
804
= (float)newTotal / (float)rootParser->m_accounting.countBytesDirect;
805
fprintf(
806
stderr,
807
"expat: Allocations(%p): Direct " EXPAT_FMT_ULL("10") ", allocated %c" EXPAT_FMT_ULL(
808
"10") " to " EXPAT_FMT_ULL("10") " (" EXPAT_FMT_ULL("10") " peak), amplification %8.2f (xmlparse.c:%d)\n",
809
(void *)rootParser, rootParser->m_accounting.countBytesDirect, operator,
810
absDiff, newTotal, peakTotal, (double)amplification, sourceLine);
811
}
812
813
static bool
814
expat_heap_increase_tolerable(XML_Parser rootParser, XmlBigCount increase,
815
int sourceLine) {
816
assert(rootParser != NULL);
817
assert(increase > 0);
818
819
XmlBigCount newTotal = 0;
820
bool tolerable = true;
821
822
// Detect integer overflow
823
if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated < increase) {
824
tolerable = false;
825
} else {
826
newTotal = rootParser->m_alloc_tracker.bytesAllocated + increase;
827
828
if (newTotal >= rootParser->m_alloc_tracker.activationThresholdBytes) {
829
assert(newTotal > 0);
830
// NOTE: This can be +infinity when dividing by zero but not -nan
831
const float amplification
832
= (float)newTotal / (float)rootParser->m_accounting.countBytesDirect;
833
if (amplification
834
> rootParser->m_alloc_tracker.maximumAmplificationFactor) {
835
tolerable = false;
836
}
837
}
838
}
839
840
if (! tolerable && (rootParser->m_alloc_tracker.debugLevel >= 1)) {
841
expat_heap_stat(rootParser, '+', increase, newTotal, newTotal, sourceLine);
842
}
843
844
return tolerable;
845
}
846
847
# if defined(XML_TESTING)
848
void *
849
# else
850
static void *
851
# endif
852
expat_malloc(XML_Parser parser, size_t size, int sourceLine) {
853
// Detect integer overflow
854
if (SIZE_MAX - size < sizeof(size_t) + EXPAT_MALLOC_PADDING) {
855
return NULL;
856
}
857
858
const XML_Parser rootParser = getRootParserOf(parser, NULL);
859
assert(rootParser->m_parentParser == NULL);
860
861
const size_t bytesToAllocate = sizeof(size_t) + EXPAT_MALLOC_PADDING + size;
862
863
if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated
864
< bytesToAllocate) {
865
return NULL; // i.e. signal integer overflow as out-of-memory
866
}
867
868
if (! expat_heap_increase_tolerable(rootParser, bytesToAllocate,
869
sourceLine)) {
870
return NULL; // i.e. signal violation as out-of-memory
871
}
872
873
// Actually allocate
874
void *const mallocedPtr = parser->m_mem.malloc_fcn(bytesToAllocate);
875
876
if (mallocedPtr == NULL) {
877
return NULL;
878
}
879
880
// Update in-block recorded size
881
*(size_t *)mallocedPtr = size;
882
883
// Update accounting
884
rootParser->m_alloc_tracker.bytesAllocated += bytesToAllocate;
885
886
// Report as needed
887
if (rootParser->m_alloc_tracker.debugLevel >= 2) {
888
if (rootParser->m_alloc_tracker.bytesAllocated
889
> rootParser->m_alloc_tracker.peakBytesAllocated) {
890
rootParser->m_alloc_tracker.peakBytesAllocated
891
= rootParser->m_alloc_tracker.bytesAllocated;
892
}
893
expat_heap_stat(rootParser, '+', bytesToAllocate,
894
rootParser->m_alloc_tracker.bytesAllocated,
895
rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine);
896
}
897
898
return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING;
899
}
900
901
# if defined(XML_TESTING)
902
void
903
# else
904
static void
905
# endif
906
expat_free(XML_Parser parser, void *ptr, int sourceLine) {
907
assert(parser != NULL);
908
909
if (ptr == NULL) {
910
return;
911
}
912
913
const XML_Parser rootParser = getRootParserOf(parser, NULL);
914
assert(rootParser->m_parentParser == NULL);
915
916
// Extract size (to the eyes of malloc_fcn/realloc_fcn) and
917
// the original pointer returned by malloc/realloc
918
void *const mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t);
919
const size_t bytesAllocated
920
= sizeof(size_t) + EXPAT_MALLOC_PADDING + *(size_t *)mallocedPtr;
921
922
// Update accounting
923
assert(rootParser->m_alloc_tracker.bytesAllocated >= bytesAllocated);
924
rootParser->m_alloc_tracker.bytesAllocated -= bytesAllocated;
925
926
// Report as needed
927
if (rootParser->m_alloc_tracker.debugLevel >= 2) {
928
expat_heap_stat(rootParser, '-', bytesAllocated,
929
rootParser->m_alloc_tracker.bytesAllocated,
930
rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine);
931
}
932
933
// NOTE: This may be freeing rootParser, so freeing has to come last
934
parser->m_mem.free_fcn(mallocedPtr);
935
}
936
937
# if defined(XML_TESTING)
938
void *
939
# else
940
static void *
941
# endif
942
expat_realloc(XML_Parser parser, void *ptr, size_t size, int sourceLine) {
943
assert(parser != NULL);
944
945
if (ptr == NULL) {
946
return expat_malloc(parser, size, sourceLine);
947
}
948
949
if (size == 0) {
950
expat_free(parser, ptr, sourceLine);
951
return NULL;
952
}
953
954
const XML_Parser rootParser = getRootParserOf(parser, NULL);
955
assert(rootParser->m_parentParser == NULL);
956
957
// Extract original size (to the eyes of the caller) and the original
958
// pointer returned by malloc/realloc
959
void *mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t);
960
const size_t prevSize = *(size_t *)mallocedPtr;
961
962
// Classify upcoming change
963
const bool isIncrease = (size > prevSize);
964
const size_t absDiff
965
= (size > prevSize) ? (size - prevSize) : (prevSize - size);
966
967
// Ask for permission from accounting
968
if (isIncrease) {
969
if (! expat_heap_increase_tolerable(rootParser, absDiff, sourceLine)) {
970
return NULL; // i.e. signal violation as out-of-memory
971
}
972
}
973
974
// NOTE: Integer overflow detection has already been done for us
975
// by expat_heap_increase_tolerable(..) above
976
assert(SIZE_MAX - sizeof(size_t) - EXPAT_MALLOC_PADDING >= size);
977
978
// Actually allocate
979
mallocedPtr = parser->m_mem.realloc_fcn(
980
mallocedPtr, sizeof(size_t) + EXPAT_MALLOC_PADDING + size);
981
982
if (mallocedPtr == NULL) {
983
return NULL;
984
}
985
986
// Update accounting
987
if (isIncrease) {
988
assert((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated
989
>= absDiff);
990
rootParser->m_alloc_tracker.bytesAllocated += absDiff;
991
} else { // i.e. decrease
992
assert(rootParser->m_alloc_tracker.bytesAllocated >= absDiff);
993
rootParser->m_alloc_tracker.bytesAllocated -= absDiff;
994
}
995
996
// Report as needed
997
if (rootParser->m_alloc_tracker.debugLevel >= 2) {
998
if (rootParser->m_alloc_tracker.bytesAllocated
999
> rootParser->m_alloc_tracker.peakBytesAllocated) {
1000
rootParser->m_alloc_tracker.peakBytesAllocated
1001
= rootParser->m_alloc_tracker.bytesAllocated;
1002
}
1003
expat_heap_stat(rootParser, isIncrease ? '+' : '-', absDiff,
1004
rootParser->m_alloc_tracker.bytesAllocated,
1005
rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine);
1006
}
1007
1008
// Update in-block recorded size
1009
*(size_t *)mallocedPtr = size;
1010
1011
return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING;
1012
}
1013
#endif // XML_GE == 1
1014
1015
XML_Parser XMLCALL
1016
XML_ParserCreate(const XML_Char *encodingName) {
1017
return XML_ParserCreate_MM(encodingName, NULL, NULL);
1018
}
1019
1020
XML_Parser XMLCALL
1021
XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
1022
XML_Char tmp[2] = {nsSep, 0};
1023
return XML_ParserCreate_MM(encodingName, NULL, tmp);
1024
}
1025
1026
// "xml=http://www.w3.org/XML/1998/namespace"
1027
static const XML_Char implicitContext[]
1028
= {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
1029
ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
1030
ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
1031
ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
1032
ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
1033
ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
1034
ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
1035
ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
1036
'\0'};
1037
1038
/* To avoid warnings about unused functions: */
1039
#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
1040
1041
# if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
1042
1043
/* Obtain entropy on Linux 3.17+ */
1044
static int
1045
writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
1046
int success = 0; /* full count bytes written? */
1047
size_t bytesWrittenTotal = 0;
1048
const unsigned int getrandomFlags = GRND_NONBLOCK;
1049
1050
do {
1051
void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
1052
const size_t bytesToWrite = count - bytesWrittenTotal;
1053
1054
assert(bytesToWrite <= INT_MAX);
1055
1056
const int bytesWrittenMore =
1057
# if defined(HAVE_GETRANDOM)
1058
(int)getrandom(currentTarget, bytesToWrite, getrandomFlags);
1059
# else
1060
(int)syscall(SYS_getrandom, currentTarget, bytesToWrite,
1061
getrandomFlags);
1062
# endif
1063
1064
if (bytesWrittenMore > 0) {
1065
bytesWrittenTotal += bytesWrittenMore;
1066
if (bytesWrittenTotal >= count)
1067
success = 1;
1068
}
1069
} while (! success && (errno == EINTR));
1070
1071
return success;
1072
}
1073
1074
# endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
1075
1076
# if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
1077
1078
/* Extract entropy from /dev/urandom */
1079
static int
1080
writeRandomBytes_dev_urandom(void *target, size_t count) {
1081
int success = 0; /* full count bytes written? */
1082
size_t bytesWrittenTotal = 0;
1083
1084
const int fd = open("/dev/urandom", O_RDONLY);
1085
if (fd < 0) {
1086
return 0;
1087
}
1088
1089
do {
1090
void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
1091
const size_t bytesToWrite = count - bytesWrittenTotal;
1092
1093
const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
1094
1095
if (bytesWrittenMore > 0) {
1096
bytesWrittenTotal += bytesWrittenMore;
1097
if (bytesWrittenTotal >= count)
1098
success = 1;
1099
}
1100
} while (! success && (errno == EINTR));
1101
1102
close(fd);
1103
return success;
1104
}
1105
1106
# endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
1107
1108
#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
1109
1110
#if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
1111
1112
static void
1113
writeRandomBytes_arc4random(void *target, size_t count) {
1114
size_t bytesWrittenTotal = 0;
1115
1116
while (bytesWrittenTotal < count) {
1117
const uint32_t random32 = arc4random();
1118
size_t i = 0;
1119
1120
for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
1121
i++, bytesWrittenTotal++) {
1122
const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
1123
((uint8_t *)target)[bytesWrittenTotal] = random8;
1124
}
1125
}
1126
}
1127
1128
#endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
1129
1130
#ifdef _WIN32
1131
1132
/* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
1133
as it didn't declare it in its header prior to version 5.3.0 of its
1134
runtime package (mingwrt, containing stdlib.h). The upstream fix
1135
was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
1136
# if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
1137
&& __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
1138
__declspec(dllimport) int rand_s(unsigned int *);
1139
# endif
1140
1141
/* Obtain entropy on Windows using the rand_s() function which
1142
* generates cryptographically secure random numbers. Internally it
1143
* uses RtlGenRandom API which is present in Windows XP and later.
1144
*/
1145
static int
1146
writeRandomBytes_rand_s(void *target, size_t count) {
1147
size_t bytesWrittenTotal = 0;
1148
1149
while (bytesWrittenTotal < count) {
1150
unsigned int random32 = 0;
1151
size_t i = 0;
1152
1153
if (rand_s(&random32))
1154
return 0; /* failure */
1155
1156
for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
1157
i++, bytesWrittenTotal++) {
1158
const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
1159
((uint8_t *)target)[bytesWrittenTotal] = random8;
1160
}
1161
}
1162
return 1; /* success */
1163
}
1164
1165
#endif /* _WIN32 */
1166
1167
#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
1168
1169
static unsigned long
1170
gather_time_entropy(void) {
1171
# ifdef _WIN32
1172
FILETIME ft;
1173
GetSystemTimeAsFileTime(&ft); /* never fails */
1174
return ft.dwHighDateTime ^ ft.dwLowDateTime;
1175
# else
1176
struct timeval tv;
1177
int gettimeofday_res;
1178
1179
gettimeofday_res = gettimeofday(&tv, NULL);
1180
1181
# if defined(NDEBUG)
1182
(void)gettimeofday_res;
1183
# else
1184
assert(gettimeofday_res == 0);
1185
# endif /* defined(NDEBUG) */
1186
1187
/* Microseconds time is <20 bits entropy */
1188
return tv.tv_usec;
1189
# endif
1190
}
1191
1192
#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
1193
1194
static unsigned long
1195
ENTROPY_DEBUG(const char *label, unsigned long entropy) {
1196
if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
1197
fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
1198
(int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
1199
}
1200
return entropy;
1201
}
1202
1203
static unsigned long
1204
generate_hash_secret_salt(XML_Parser parser) {
1205
unsigned long entropy;
1206
(void)parser;
1207
1208
/* "Failproof" high quality providers: */
1209
#if defined(HAVE_ARC4RANDOM_BUF)
1210
arc4random_buf(&entropy, sizeof(entropy));
1211
return ENTROPY_DEBUG("arc4random_buf", entropy);
1212
#elif defined(HAVE_ARC4RANDOM)
1213
writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
1214
return ENTROPY_DEBUG("arc4random", entropy);
1215
#else
1216
/* Try high quality providers first .. */
1217
# ifdef _WIN32
1218
if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
1219
return ENTROPY_DEBUG("rand_s", entropy);
1220
}
1221
# elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
1222
if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
1223
return ENTROPY_DEBUG("getrandom", entropy);
1224
}
1225
# endif
1226
# if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
1227
if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
1228
return ENTROPY_DEBUG("/dev/urandom", entropy);
1229
}
1230
# endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
1231
/* .. and self-made low quality for backup: */
1232
1233
/* Process ID is 0 bits entropy if attacker has local access */
1234
entropy = gather_time_entropy() ^ getpid();
1235
1236
/* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
1237
if (sizeof(unsigned long) == 4) {
1238
return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
1239
} else {
1240
return ENTROPY_DEBUG("fallback(8)",
1241
entropy * (unsigned long)2305843009213693951ULL);
1242
}
1243
#endif
1244
}
1245
1246
static unsigned long
1247
get_hash_secret_salt(XML_Parser parser) {
1248
const XML_Parser rootParser = getRootParserOf(parser, NULL);
1249
assert(! rootParser->m_parentParser);
1250
1251
return rootParser->m_hash_secret_salt;
1252
}
1253
1254
static enum XML_Error
1255
callProcessor(XML_Parser parser, const char *start, const char *end,
1256
const char **endPtr) {
1257
const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
1258
1259
if (parser->m_reparseDeferralEnabled
1260
&& ! parser->m_parsingStatus.finalBuffer) {
1261
// Heuristic: don't try to parse a partial token again until the amount of
1262
// available data has increased significantly.
1263
const size_t had_before = parser->m_partialTokenBytesBefore;
1264
// ...but *do* try anyway if we're close to causing a reallocation.
1265
size_t available_buffer
1266
= EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1267
#if XML_CONTEXT_BYTES > 0
1268
available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1269
#endif
1270
available_buffer
1271
+= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1272
// m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1273
const bool enough
1274
= (have_now >= 2 * had_before)
1275
|| ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1276
1277
if (! enough) {
1278
*endPtr = start; // callers may expect this to be set
1279
return XML_ERROR_NONE;
1280
}
1281
}
1282
#if defined(XML_TESTING)
1283
g_bytesScanned += (unsigned)have_now;
1284
#endif
1285
// Run in a loop to eliminate dangerous recursion depths
1286
enum XML_Error ret;
1287
*endPtr = start;
1288
while (1) {
1289
// Use endPtr as the new start in each iteration, since it will
1290
// be set to the next start point by m_processor.
1291
ret = parser->m_processor(parser, *endPtr, end, endPtr);
1292
1293
// Make parsing status (and in particular XML_SUSPENDED) take
1294
// precedence over re-enter flag when they disagree
1295
if (parser->m_parsingStatus.parsing != XML_PARSING) {
1296
parser->m_reenter = XML_FALSE;
1297
}
1298
1299
if (! parser->m_reenter) {
1300
break;
1301
}
1302
1303
parser->m_reenter = XML_FALSE;
1304
if (ret != XML_ERROR_NONE)
1305
return ret;
1306
}
1307
1308
if (ret == XML_ERROR_NONE) {
1309
// if we consumed nothing, remember what we had on this parse attempt.
1310
if (*endPtr == start) {
1311
parser->m_partialTokenBytesBefore = have_now;
1312
} else {
1313
parser->m_partialTokenBytesBefore = 0;
1314
}
1315
}
1316
return ret;
1317
}
1318
1319
static XML_Bool /* only valid for root parser */
1320
startParsing(XML_Parser parser) {
1321
/* hash functions must be initialized before setContext() is called */
1322
if (parser->m_hash_secret_salt == 0)
1323
parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
1324
if (parser->m_ns) {
1325
/* implicit context only set for root parser, since child
1326
parsers (i.e. external entity parsers) will inherit it
1327
*/
1328
return setContext(parser, implicitContext);
1329
}
1330
return XML_TRUE;
1331
}
1332
1333
XML_Parser XMLCALL
1334
XML_ParserCreate_MM(const XML_Char *encodingName,
1335
const XML_Memory_Handling_Suite *memsuite,
1336
const XML_Char *nameSep) {
1337
return parserCreate(encodingName, memsuite, nameSep, NULL, NULL);
1338
}
1339
1340
static XML_Parser
1341
parserCreate(const XML_Char *encodingName,
1342
const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1343
DTD *dtd, XML_Parser parentParser) {
1344
XML_Parser parser = NULL;
1345
1346
#if XML_GE == 1
1347
const size_t increase
1348
= sizeof(size_t) + EXPAT_MALLOC_PADDING + sizeof(struct XML_ParserStruct);
1349
1350
if (parentParser != NULL) {
1351
const XML_Parser rootParser = getRootParserOf(parentParser, NULL);
1352
if (! expat_heap_increase_tolerable(rootParser, increase, __LINE__)) {
1353
return NULL;
1354
}
1355
}
1356
#else
1357
UNUSED_P(parentParser);
1358
#endif
1359
1360
if (memsuite) {
1361
XML_Memory_Handling_Suite *mtemp;
1362
#if XML_GE == 1
1363
void *const sizeAndParser
1364
= memsuite->malloc_fcn(sizeof(size_t) + EXPAT_MALLOC_PADDING
1365
+ sizeof(struct XML_ParserStruct));
1366
if (sizeAndParser != NULL) {
1367
*(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct);
1368
parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t)
1369
+ EXPAT_MALLOC_PADDING);
1370
#else
1371
parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1372
if (parser != NULL) {
1373
#endif
1374
mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1375
mtemp->malloc_fcn = memsuite->malloc_fcn;
1376
mtemp->realloc_fcn = memsuite->realloc_fcn;
1377
mtemp->free_fcn = memsuite->free_fcn;
1378
}
1379
} else {
1380
XML_Memory_Handling_Suite *mtemp;
1381
#if XML_GE == 1
1382
void *const sizeAndParser = malloc(sizeof(size_t) + EXPAT_MALLOC_PADDING
1383
+ sizeof(struct XML_ParserStruct));
1384
if (sizeAndParser != NULL) {
1385
*(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct);
1386
parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t)
1387
+ EXPAT_MALLOC_PADDING);
1388
#else
1389
parser = malloc(sizeof(struct XML_ParserStruct));
1390
if (parser != NULL) {
1391
#endif
1392
mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1393
mtemp->malloc_fcn = malloc;
1394
mtemp->realloc_fcn = realloc;
1395
mtemp->free_fcn = free;
1396
}
1397
} // cppcheck-suppress[memleak symbolName=sizeAndParser] // Cppcheck >=2.18.0
1398
1399
if (! parser)
1400
return parser;
1401
1402
#if XML_GE == 1
1403
// Initialize .m_alloc_tracker
1404
memset(&parser->m_alloc_tracker, 0, sizeof(MALLOC_TRACKER));
1405
if (parentParser == NULL) {
1406
parser->m_alloc_tracker.debugLevel
1407
= getDebugLevel("EXPAT_MALLOC_DEBUG", 0u);
1408
parser->m_alloc_tracker.maximumAmplificationFactor
1409
= EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT;
1410
parser->m_alloc_tracker.activationThresholdBytes
1411
= EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT;
1412
1413
// NOTE: This initialization needs to come this early because these fields
1414
// are read by allocation tracking code
1415
parser->m_parentParser = NULL;
1416
parser->m_accounting.countBytesDirect = 0;
1417
} else {
1418
parser->m_parentParser = parentParser;
1419
}
1420
1421
// Record XML_ParserStruct allocation we did a few lines up before
1422
const XML_Parser rootParser = getRootParserOf(parser, NULL);
1423
assert(rootParser->m_parentParser == NULL);
1424
assert(SIZE_MAX - rootParser->m_alloc_tracker.bytesAllocated >= increase);
1425
rootParser->m_alloc_tracker.bytesAllocated += increase;
1426
1427
// Report on allocation
1428
if (rootParser->m_alloc_tracker.debugLevel >= 2) {
1429
if (rootParser->m_alloc_tracker.bytesAllocated
1430
> rootParser->m_alloc_tracker.peakBytesAllocated) {
1431
rootParser->m_alloc_tracker.peakBytesAllocated
1432
= rootParser->m_alloc_tracker.bytesAllocated;
1433
}
1434
1435
expat_heap_stat(rootParser, '+', increase,
1436
rootParser->m_alloc_tracker.bytesAllocated,
1437
rootParser->m_alloc_tracker.peakBytesAllocated, __LINE__);
1438
}
1439
#else
1440
parser->m_parentParser = NULL;
1441
#endif // XML_GE == 1
1442
1443
parser->m_buffer = NULL;
1444
parser->m_bufferLim = NULL;
1445
1446
parser->m_attsSize = INIT_ATTS_SIZE;
1447
parser->m_atts = MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1448
if (parser->m_atts == NULL) {
1449
FREE(parser, parser);
1450
return NULL;
1451
}
1452
#ifdef XML_ATTR_INFO
1453
parser->m_attInfo = MALLOC(parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1454
if (parser->m_attInfo == NULL) {
1455
FREE(parser, parser->m_atts);
1456
FREE(parser, parser);
1457
return NULL;
1458
}
1459
#endif
1460
parser->m_dataBuf = MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1461
if (parser->m_dataBuf == NULL) {
1462
FREE(parser, parser->m_atts);
1463
#ifdef XML_ATTR_INFO
1464
FREE(parser, parser->m_attInfo);
1465
#endif
1466
FREE(parser, parser);
1467
return NULL;
1468
}
1469
parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1470
1471
if (dtd)
1472
parser->m_dtd = dtd;
1473
else {
1474
parser->m_dtd = dtdCreate(parser);
1475
if (parser->m_dtd == NULL) {
1476
FREE(parser, parser->m_dataBuf);
1477
FREE(parser, parser->m_atts);
1478
#ifdef XML_ATTR_INFO
1479
FREE(parser, parser->m_attInfo);
1480
#endif
1481
FREE(parser, parser);
1482
return NULL;
1483
}
1484
}
1485
1486
parser->m_freeBindingList = NULL;
1487
parser->m_freeTagList = NULL;
1488
parser->m_freeInternalEntities = NULL;
1489
parser->m_freeAttributeEntities = NULL;
1490
parser->m_freeValueEntities = NULL;
1491
1492
parser->m_groupSize = 0;
1493
parser->m_groupConnector = NULL;
1494
1495
parser->m_unknownEncodingHandler = NULL;
1496
parser->m_unknownEncodingHandlerData = NULL;
1497
1498
parser->m_namespaceSeparator = ASCII_EXCL;
1499
parser->m_ns = XML_FALSE;
1500
parser->m_ns_triplets = XML_FALSE;
1501
1502
parser->m_nsAtts = NULL;
1503
parser->m_nsAttsVersion = 0;
1504
parser->m_nsAttsPower = 0;
1505
1506
parser->m_protocolEncodingName = NULL;
1507
1508
poolInit(&parser->m_tempPool, parser);
1509
poolInit(&parser->m_temp2Pool, parser);
1510
parserInit(parser, encodingName);
1511
1512
if (encodingName && ! parser->m_protocolEncodingName) {
1513
if (dtd) {
1514
// We need to stop the upcoming call to XML_ParserFree from happily
1515
// destroying parser->m_dtd because the DTD is shared with the parent
1516
// parser and the only guard that keeps XML_ParserFree from destroying
1517
// parser->m_dtd is parser->m_isParamEntity but it will be set to
1518
// XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1519
parser->m_dtd = NULL;
1520
}
1521
XML_ParserFree(parser);
1522
return NULL;
1523
}
1524
1525
if (nameSep) {
1526
parser->m_ns = XML_TRUE;
1527
parser->m_internalEncoding = XmlGetInternalEncodingNS();
1528
parser->m_namespaceSeparator = *nameSep;
1529
} else {
1530
parser->m_internalEncoding = XmlGetInternalEncoding();
1531
}
1532
1533
return parser;
1534
}
1535
1536
static void
1537
parserInit(XML_Parser parser, const XML_Char *encodingName) {
1538
parser->m_processor = prologInitProcessor;
1539
XmlPrologStateInit(&parser->m_prologState);
1540
if (encodingName != NULL) {
1541
parser->m_protocolEncodingName = copyString(encodingName, parser);
1542
}
1543
parser->m_curBase = NULL;
1544
XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1545
parser->m_userData = NULL;
1546
parser->m_handlerArg = NULL;
1547
parser->m_startElementHandler = NULL;
1548
parser->m_endElementHandler = NULL;
1549
parser->m_characterDataHandler = NULL;
1550
parser->m_processingInstructionHandler = NULL;
1551
parser->m_commentHandler = NULL;
1552
parser->m_startCdataSectionHandler = NULL;
1553
parser->m_endCdataSectionHandler = NULL;
1554
parser->m_defaultHandler = NULL;
1555
parser->m_startDoctypeDeclHandler = NULL;
1556
parser->m_endDoctypeDeclHandler = NULL;
1557
parser->m_unparsedEntityDeclHandler = NULL;
1558
parser->m_notationDeclHandler = NULL;
1559
parser->m_startNamespaceDeclHandler = NULL;
1560
parser->m_endNamespaceDeclHandler = NULL;
1561
parser->m_notStandaloneHandler = NULL;
1562
parser->m_externalEntityRefHandler = NULL;
1563
parser->m_externalEntityRefHandlerArg = parser;
1564
parser->m_skippedEntityHandler = NULL;
1565
parser->m_elementDeclHandler = NULL;
1566
parser->m_attlistDeclHandler = NULL;
1567
parser->m_entityDeclHandler = NULL;
1568
parser->m_xmlDeclHandler = NULL;
1569
parser->m_bufferPtr = parser->m_buffer;
1570
parser->m_bufferEnd = parser->m_buffer;
1571
parser->m_parseEndByteIndex = 0;
1572
parser->m_parseEndPtr = NULL;
1573
parser->m_partialTokenBytesBefore = 0;
1574
parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1575
parser->m_lastBufferRequestSize = 0;
1576
parser->m_declElementType = NULL;
1577
parser->m_declAttributeId = NULL;
1578
parser->m_declEntity = NULL;
1579
parser->m_doctypeName = NULL;
1580
parser->m_doctypeSysid = NULL;
1581
parser->m_doctypePubid = NULL;
1582
parser->m_declAttributeType = NULL;
1583
parser->m_declNotationName = NULL;
1584
parser->m_declNotationPublicId = NULL;
1585
parser->m_declAttributeIsCdata = XML_FALSE;
1586
parser->m_declAttributeIsId = XML_FALSE;
1587
memset(&parser->m_position, 0, sizeof(POSITION));
1588
parser->m_errorCode = XML_ERROR_NONE;
1589
parser->m_eventPtr = NULL;
1590
parser->m_eventEndPtr = NULL;
1591
parser->m_positionPtr = NULL;
1592
parser->m_openInternalEntities = NULL;
1593
parser->m_openAttributeEntities = NULL;
1594
parser->m_openValueEntities = NULL;
1595
parser->m_defaultExpandInternalEntities = XML_TRUE;
1596
parser->m_tagLevel = 0;
1597
parser->m_tagStack = NULL;
1598
parser->m_inheritedBindings = NULL;
1599
parser->m_nSpecifiedAtts = 0;
1600
parser->m_unknownEncodingMem = NULL;
1601
parser->m_unknownEncodingRelease = NULL;
1602
parser->m_unknownEncodingData = NULL;
1603
parser->m_parsingStatus.parsing = XML_INITIALIZED;
1604
// Reentry can only be triggered inside m_processor calls
1605
parser->m_reenter = XML_FALSE;
1606
#ifdef XML_DTD
1607
parser->m_isParamEntity = XML_FALSE;
1608
parser->m_useForeignDTD = XML_FALSE;
1609
parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1610
#endif
1611
parser->m_hash_secret_salt = 0;
1612
1613
#if XML_GE == 1
1614
memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1615
parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1616
parser->m_accounting.maximumAmplificationFactor
1617
= EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1618
parser->m_accounting.activationThresholdBytes
1619
= EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1620
1621
memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1622
parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1623
#endif
1624
}
1625
1626
/* moves list of bindings to m_freeBindingList */
1627
static void FASTCALL
1628
moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1629
while (bindings) {
1630
BINDING *b = bindings;
1631
bindings = bindings->nextTagBinding;
1632
b->nextTagBinding = parser->m_freeBindingList;
1633
parser->m_freeBindingList = b;
1634
}
1635
}
1636
1637
XML_Bool XMLCALL
1638
XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1639
TAG *tStk;
1640
OPEN_INTERNAL_ENTITY *openEntityList;
1641
1642
if (parser == NULL)
1643
return XML_FALSE;
1644
1645
if (parser->m_parentParser)
1646
return XML_FALSE;
1647
/* move m_tagStack to m_freeTagList */
1648
tStk = parser->m_tagStack;
1649
while (tStk) {
1650
TAG *tag = tStk;
1651
tStk = tStk->parent;
1652
tag->parent = parser->m_freeTagList;
1653
moveToFreeBindingList(parser, tag->bindings);
1654
tag->bindings = NULL;
1655
parser->m_freeTagList = tag;
1656
}
1657
/* move m_openInternalEntities to m_freeInternalEntities */
1658
openEntityList = parser->m_openInternalEntities;
1659
while (openEntityList) {
1660
OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1661
openEntityList = openEntity->next;
1662
openEntity->next = parser->m_freeInternalEntities;
1663
parser->m_freeInternalEntities = openEntity;
1664
}
1665
/* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but
1666
* for attributes) */
1667
openEntityList = parser->m_openAttributeEntities;
1668
while (openEntityList) {
1669
OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1670
openEntityList = openEntity->next;
1671
openEntity->next = parser->m_freeAttributeEntities;
1672
parser->m_freeAttributeEntities = openEntity;
1673
}
1674
/* move m_openValueEntities to m_freeValueEntities (i.e. same task but
1675
* for value entities) */
1676
openEntityList = parser->m_openValueEntities;
1677
while (openEntityList) {
1678
OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1679
openEntityList = openEntity->next;
1680
openEntity->next = parser->m_freeValueEntities;
1681
parser->m_freeValueEntities = openEntity;
1682
}
1683
moveToFreeBindingList(parser, parser->m_inheritedBindings);
1684
FREE(parser, parser->m_unknownEncodingMem);
1685
if (parser->m_unknownEncodingRelease)
1686
parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1687
poolClear(&parser->m_tempPool);
1688
poolClear(&parser->m_temp2Pool);
1689
FREE(parser, (void *)parser->m_protocolEncodingName);
1690
parser->m_protocolEncodingName = NULL;
1691
parserInit(parser, encodingName);
1692
dtdReset(parser->m_dtd, parser);
1693
return XML_TRUE;
1694
}
1695
1696
static XML_Bool
1697
parserBusy(XML_Parser parser) {
1698
switch (parser->m_parsingStatus.parsing) {
1699
case XML_PARSING:
1700
case XML_SUSPENDED:
1701
return XML_TRUE;
1702
case XML_INITIALIZED:
1703
case XML_FINISHED:
1704
default:
1705
return XML_FALSE;
1706
}
1707
}
1708
1709
enum XML_Status XMLCALL
1710
XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1711
if (parser == NULL)
1712
return XML_STATUS_ERROR;
1713
/* Block after XML_Parse()/XML_ParseBuffer() has been called.
1714
XXX There's no way for the caller to determine which of the
1715
XXX possible error cases caused the XML_STATUS_ERROR return.
1716
*/
1717
if (parserBusy(parser))
1718
return XML_STATUS_ERROR;
1719
1720
/* Get rid of any previous encoding name */
1721
FREE(parser, (void *)parser->m_protocolEncodingName);
1722
1723
if (encodingName == NULL)
1724
/* No new encoding name */
1725
parser->m_protocolEncodingName = NULL;
1726
else {
1727
/* Copy the new encoding name into allocated memory */
1728
parser->m_protocolEncodingName = copyString(encodingName, parser);
1729
if (! parser->m_protocolEncodingName)
1730
return XML_STATUS_ERROR;
1731
}
1732
return XML_STATUS_OK;
1733
}
1734
1735
XML_Parser XMLCALL
1736
XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1737
const XML_Char *encodingName) {
1738
XML_Parser parser = oldParser;
1739
DTD *newDtd = NULL;
1740
DTD *oldDtd;
1741
XML_StartElementHandler oldStartElementHandler;
1742
XML_EndElementHandler oldEndElementHandler;
1743
XML_CharacterDataHandler oldCharacterDataHandler;
1744
XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1745
XML_CommentHandler oldCommentHandler;
1746
XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1747
XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1748
XML_DefaultHandler oldDefaultHandler;
1749
XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1750
XML_NotationDeclHandler oldNotationDeclHandler;
1751
XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1752
XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1753
XML_NotStandaloneHandler oldNotStandaloneHandler;
1754
XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1755
XML_SkippedEntityHandler oldSkippedEntityHandler;
1756
XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1757
XML_ElementDeclHandler oldElementDeclHandler;
1758
XML_AttlistDeclHandler oldAttlistDeclHandler;
1759
XML_EntityDeclHandler oldEntityDeclHandler;
1760
XML_XmlDeclHandler oldXmlDeclHandler;
1761
ELEMENT_TYPE *oldDeclElementType;
1762
1763
void *oldUserData;
1764
void *oldHandlerArg;
1765
XML_Bool oldDefaultExpandInternalEntities;
1766
XML_Parser oldExternalEntityRefHandlerArg;
1767
#ifdef XML_DTD
1768
enum XML_ParamEntityParsing oldParamEntityParsing;
1769
int oldInEntityValue;
1770
#endif
1771
XML_Bool oldns_triplets;
1772
/* Note that the new parser shares the same hash secret as the old
1773
parser, so that dtdCopy and copyEntityTable can lookup values
1774
from hash tables associated with either parser without us having
1775
to worry which hash secrets each table has.
1776
*/
1777
unsigned long oldhash_secret_salt;
1778
XML_Bool oldReparseDeferralEnabled;
1779
1780
/* Validate the oldParser parameter before we pull everything out of it */
1781
if (oldParser == NULL)
1782
return NULL;
1783
1784
/* Stash the original parser contents on the stack */
1785
oldDtd = parser->m_dtd;
1786
oldStartElementHandler = parser->m_startElementHandler;
1787
oldEndElementHandler = parser->m_endElementHandler;
1788
oldCharacterDataHandler = parser->m_characterDataHandler;
1789
oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1790
oldCommentHandler = parser->m_commentHandler;
1791
oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1792
oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1793
oldDefaultHandler = parser->m_defaultHandler;
1794
oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1795
oldNotationDeclHandler = parser->m_notationDeclHandler;
1796
oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1797
oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1798
oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1799
oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1800
oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1801
oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1802
oldElementDeclHandler = parser->m_elementDeclHandler;
1803
oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1804
oldEntityDeclHandler = parser->m_entityDeclHandler;
1805
oldXmlDeclHandler = parser->m_xmlDeclHandler;
1806
oldDeclElementType = parser->m_declElementType;
1807
1808
oldUserData = parser->m_userData;
1809
oldHandlerArg = parser->m_handlerArg;
1810
oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1811
oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1812
#ifdef XML_DTD
1813
oldParamEntityParsing = parser->m_paramEntityParsing;
1814
oldInEntityValue = parser->m_prologState.inEntityValue;
1815
#endif
1816
oldns_triplets = parser->m_ns_triplets;
1817
/* Note that the new parser shares the same hash secret as the old
1818
parser, so that dtdCopy and copyEntityTable can lookup values
1819
from hash tables associated with either parser without us having
1820
to worry which hash secrets each table has.
1821
*/
1822
oldhash_secret_salt = parser->m_hash_secret_salt;
1823
oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1824
1825
#ifdef XML_DTD
1826
if (! context)
1827
newDtd = oldDtd;
1828
#endif /* XML_DTD */
1829
1830
/* Note that the magical uses of the pre-processor to make field
1831
access look more like C++ require that `parser' be overwritten
1832
here. This makes this function more painful to follow than it
1833
would be otherwise.
1834
*/
1835
if (parser->m_ns) {
1836
XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1837
parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd, oldParser);
1838
} else {
1839
parser
1840
= parserCreate(encodingName, &parser->m_mem, NULL, newDtd, oldParser);
1841
}
1842
1843
if (! parser)
1844
return NULL;
1845
1846
parser->m_startElementHandler = oldStartElementHandler;
1847
parser->m_endElementHandler = oldEndElementHandler;
1848
parser->m_characterDataHandler = oldCharacterDataHandler;
1849
parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1850
parser->m_commentHandler = oldCommentHandler;
1851
parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1852
parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1853
parser->m_defaultHandler = oldDefaultHandler;
1854
parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1855
parser->m_notationDeclHandler = oldNotationDeclHandler;
1856
parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1857
parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1858
parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1859
parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1860
parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1861
parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1862
parser->m_elementDeclHandler = oldElementDeclHandler;
1863
parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1864
parser->m_entityDeclHandler = oldEntityDeclHandler;
1865
parser->m_xmlDeclHandler = oldXmlDeclHandler;
1866
parser->m_declElementType = oldDeclElementType;
1867
parser->m_userData = oldUserData;
1868
if (oldUserData == oldHandlerArg)
1869
parser->m_handlerArg = parser->m_userData;
1870
else
1871
parser->m_handlerArg = parser;
1872
if (oldExternalEntityRefHandlerArg != oldParser)
1873
parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1874
parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1875
parser->m_ns_triplets = oldns_triplets;
1876
parser->m_hash_secret_salt = oldhash_secret_salt;
1877
parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1878
parser->m_parentParser = oldParser;
1879
#ifdef XML_DTD
1880
parser->m_paramEntityParsing = oldParamEntityParsing;
1881
parser->m_prologState.inEntityValue = oldInEntityValue;
1882
if (context) {
1883
#endif /* XML_DTD */
1884
if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, parser)
1885
|| ! setContext(parser, context)) {
1886
XML_ParserFree(parser);
1887
return NULL;
1888
}
1889
parser->m_processor = externalEntityInitProcessor;
1890
#ifdef XML_DTD
1891
} else {
1892
/* The DTD instance referenced by parser->m_dtd is shared between the
1893
document's root parser and external PE parsers, therefore one does not
1894
need to call setContext. In addition, one also *must* not call
1895
setContext, because this would overwrite existing prefix->binding
1896
pointers in parser->m_dtd with ones that get destroyed with the external
1897
PE parser. This would leave those prefixes with dangling pointers.
1898
*/
1899
parser->m_isParamEntity = XML_TRUE;
1900
XmlPrologStateInitExternalEntity(&parser->m_prologState);
1901
parser->m_processor = externalParEntInitProcessor;
1902
}
1903
#endif /* XML_DTD */
1904
return parser;
1905
}
1906
1907
static void FASTCALL
1908
destroyBindings(BINDING *bindings, XML_Parser parser) {
1909
for (;;) {
1910
BINDING *b = bindings;
1911
if (! b)
1912
break;
1913
bindings = b->nextTagBinding;
1914
FREE(parser, b->uri);
1915
FREE(parser, b);
1916
}
1917
}
1918
1919
void XMLCALL
1920
XML_ParserFree(XML_Parser parser) {
1921
TAG *tagList;
1922
OPEN_INTERNAL_ENTITY *entityList;
1923
if (parser == NULL)
1924
return;
1925
/* free m_tagStack and m_freeTagList */
1926
tagList = parser->m_tagStack;
1927
for (;;) {
1928
TAG *p;
1929
if (tagList == NULL) {
1930
if (parser->m_freeTagList == NULL)
1931
break;
1932
tagList = parser->m_freeTagList;
1933
parser->m_freeTagList = NULL;
1934
}
1935
p = tagList;
1936
tagList = tagList->parent;
1937
FREE(parser, p->buf);
1938
destroyBindings(p->bindings, parser);
1939
FREE(parser, p);
1940
}
1941
/* free m_openInternalEntities and m_freeInternalEntities */
1942
entityList = parser->m_openInternalEntities;
1943
for (;;) {
1944
OPEN_INTERNAL_ENTITY *openEntity;
1945
if (entityList == NULL) {
1946
if (parser->m_freeInternalEntities == NULL)
1947
break;
1948
entityList = parser->m_freeInternalEntities;
1949
parser->m_freeInternalEntities = NULL;
1950
}
1951
openEntity = entityList;
1952
entityList = entityList->next;
1953
FREE(parser, openEntity);
1954
}
1955
/* free m_openAttributeEntities and m_freeAttributeEntities */
1956
entityList = parser->m_openAttributeEntities;
1957
for (;;) {
1958
OPEN_INTERNAL_ENTITY *openEntity;
1959
if (entityList == NULL) {
1960
if (parser->m_freeAttributeEntities == NULL)
1961
break;
1962
entityList = parser->m_freeAttributeEntities;
1963
parser->m_freeAttributeEntities = NULL;
1964
}
1965
openEntity = entityList;
1966
entityList = entityList->next;
1967
FREE(parser, openEntity);
1968
}
1969
/* free m_openValueEntities and m_freeValueEntities */
1970
entityList = parser->m_openValueEntities;
1971
for (;;) {
1972
OPEN_INTERNAL_ENTITY *openEntity;
1973
if (entityList == NULL) {
1974
if (parser->m_freeValueEntities == NULL)
1975
break;
1976
entityList = parser->m_freeValueEntities;
1977
parser->m_freeValueEntities = NULL;
1978
}
1979
openEntity = entityList;
1980
entityList = entityList->next;
1981
FREE(parser, openEntity);
1982
}
1983
destroyBindings(parser->m_freeBindingList, parser);
1984
destroyBindings(parser->m_inheritedBindings, parser);
1985
poolDestroy(&parser->m_tempPool);
1986
poolDestroy(&parser->m_temp2Pool);
1987
FREE(parser, (void *)parser->m_protocolEncodingName);
1988
#ifdef XML_DTD
1989
/* external parameter entity parsers share the DTD structure
1990
parser->m_dtd with the root parser, so we must not destroy it
1991
*/
1992
if (! parser->m_isParamEntity && parser->m_dtd)
1993
#else
1994
if (parser->m_dtd)
1995
#endif /* XML_DTD */
1996
dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, parser);
1997
FREE(parser, parser->m_atts);
1998
#ifdef XML_ATTR_INFO
1999
FREE(parser, parser->m_attInfo);
2000
#endif
2001
FREE(parser, parser->m_groupConnector);
2002
// NOTE: We are avoiding FREE(..) here because parser->m_buffer
2003
// is not being allocated with MALLOC(..) but with plain
2004
// .malloc_fcn(..).
2005
parser->m_mem.free_fcn(parser->m_buffer);
2006
FREE(parser, parser->m_dataBuf);
2007
FREE(parser, parser->m_nsAtts);
2008
FREE(parser, parser->m_unknownEncodingMem);
2009
if (parser->m_unknownEncodingRelease)
2010
parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
2011
FREE(parser, parser);
2012
}
2013
2014
void XMLCALL
2015
XML_UseParserAsHandlerArg(XML_Parser parser) {
2016
if (parser != NULL)
2017
parser->m_handlerArg = parser;
2018
}
2019
2020
enum XML_Error XMLCALL
2021
XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
2022
if (parser == NULL)
2023
return XML_ERROR_INVALID_ARGUMENT;
2024
#ifdef XML_DTD
2025
/* block after XML_Parse()/XML_ParseBuffer() has been called */
2026
if (parserBusy(parser))
2027
return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
2028
parser->m_useForeignDTD = useDTD;
2029
return XML_ERROR_NONE;
2030
#else
2031
UNUSED_P(useDTD);
2032
return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
2033
#endif
2034
}
2035
2036
void XMLCALL
2037
XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
2038
if (parser == NULL)
2039
return;
2040
/* block after XML_Parse()/XML_ParseBuffer() has been called */
2041
if (parserBusy(parser))
2042
return;
2043
parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
2044
}
2045
2046
void XMLCALL
2047
XML_SetUserData(XML_Parser parser, void *p) {
2048
if (parser == NULL)
2049
return;
2050
if (parser->m_handlerArg == parser->m_userData)
2051
parser->m_handlerArg = parser->m_userData = p;
2052
else
2053
parser->m_userData = p;
2054
}
2055
2056
enum XML_Status XMLCALL
2057
XML_SetBase(XML_Parser parser, const XML_Char *p) {
2058
if (parser == NULL)
2059
return XML_STATUS_ERROR;
2060
if (p) {
2061
p = poolCopyString(&parser->m_dtd->pool, p);
2062
if (! p)
2063
return XML_STATUS_ERROR;
2064
parser->m_curBase = p;
2065
} else
2066
parser->m_curBase = NULL;
2067
return XML_STATUS_OK;
2068
}
2069
2070
const XML_Char *XMLCALL
2071
XML_GetBase(XML_Parser parser) {
2072
if (parser == NULL)
2073
return NULL;
2074
return parser->m_curBase;
2075
}
2076
2077
int XMLCALL
2078
XML_GetSpecifiedAttributeCount(XML_Parser parser) {
2079
if (parser == NULL)
2080
return -1;
2081
return parser->m_nSpecifiedAtts;
2082
}
2083
2084
int XMLCALL
2085
XML_GetIdAttributeIndex(XML_Parser parser) {
2086
if (parser == NULL)
2087
return -1;
2088
return parser->m_idAttIndex;
2089
}
2090
2091
#ifdef XML_ATTR_INFO
2092
const XML_AttrInfo *XMLCALL
2093
XML_GetAttributeInfo(XML_Parser parser) {
2094
if (parser == NULL)
2095
return NULL;
2096
return parser->m_attInfo;
2097
}
2098
#endif
2099
2100
void XMLCALL
2101
XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
2102
XML_EndElementHandler end) {
2103
if (parser == NULL)
2104
return;
2105
parser->m_startElementHandler = start;
2106
parser->m_endElementHandler = end;
2107
}
2108
2109
void XMLCALL
2110
XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
2111
if (parser != NULL)
2112
parser->m_startElementHandler = start;
2113
}
2114
2115
void XMLCALL
2116
XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
2117
if (parser != NULL)
2118
parser->m_endElementHandler = end;
2119
}
2120
2121
void XMLCALL
2122
XML_SetCharacterDataHandler(XML_Parser parser,
2123
XML_CharacterDataHandler handler) {
2124
if (parser != NULL)
2125
parser->m_characterDataHandler = handler;
2126
}
2127
2128
void XMLCALL
2129
XML_SetProcessingInstructionHandler(XML_Parser parser,
2130
XML_ProcessingInstructionHandler handler) {
2131
if (parser != NULL)
2132
parser->m_processingInstructionHandler = handler;
2133
}
2134
2135
void XMLCALL
2136
XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
2137
if (parser != NULL)
2138
parser->m_commentHandler = handler;
2139
}
2140
2141
void XMLCALL
2142
XML_SetCdataSectionHandler(XML_Parser parser,
2143
XML_StartCdataSectionHandler start,
2144
XML_EndCdataSectionHandler end) {
2145
if (parser == NULL)
2146
return;
2147
parser->m_startCdataSectionHandler = start;
2148
parser->m_endCdataSectionHandler = end;
2149
}
2150
2151
void XMLCALL
2152
XML_SetStartCdataSectionHandler(XML_Parser parser,
2153
XML_StartCdataSectionHandler start) {
2154
if (parser != NULL)
2155
parser->m_startCdataSectionHandler = start;
2156
}
2157
2158
void XMLCALL
2159
XML_SetEndCdataSectionHandler(XML_Parser parser,
2160
XML_EndCdataSectionHandler end) {
2161
if (parser != NULL)
2162
parser->m_endCdataSectionHandler = end;
2163
}
2164
2165
void XMLCALL
2166
XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
2167
if (parser == NULL)
2168
return;
2169
parser->m_defaultHandler = handler;
2170
parser->m_defaultExpandInternalEntities = XML_FALSE;
2171
}
2172
2173
void XMLCALL
2174
XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
2175
if (parser == NULL)
2176
return;
2177
parser->m_defaultHandler = handler;
2178
parser->m_defaultExpandInternalEntities = XML_TRUE;
2179
}
2180
2181
void XMLCALL
2182
XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
2183
XML_EndDoctypeDeclHandler end) {
2184
if (parser == NULL)
2185
return;
2186
parser->m_startDoctypeDeclHandler = start;
2187
parser->m_endDoctypeDeclHandler = end;
2188
}
2189
2190
void XMLCALL
2191
XML_SetStartDoctypeDeclHandler(XML_Parser parser,
2192
XML_StartDoctypeDeclHandler start) {
2193
if (parser != NULL)
2194
parser->m_startDoctypeDeclHandler = start;
2195
}
2196
2197
void XMLCALL
2198
XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
2199
if (parser != NULL)
2200
parser->m_endDoctypeDeclHandler = end;
2201
}
2202
2203
void XMLCALL
2204
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
2205
XML_UnparsedEntityDeclHandler handler) {
2206
if (parser != NULL)
2207
parser->m_unparsedEntityDeclHandler = handler;
2208
}
2209
2210
void XMLCALL
2211
XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
2212
if (parser != NULL)
2213
parser->m_notationDeclHandler = handler;
2214
}
2215
2216
void XMLCALL
2217
XML_SetNamespaceDeclHandler(XML_Parser parser,
2218
XML_StartNamespaceDeclHandler start,
2219
XML_EndNamespaceDeclHandler end) {
2220
if (parser == NULL)
2221
return;
2222
parser->m_startNamespaceDeclHandler = start;
2223
parser->m_endNamespaceDeclHandler = end;
2224
}
2225
2226
void XMLCALL
2227
XML_SetStartNamespaceDeclHandler(XML_Parser parser,
2228
XML_StartNamespaceDeclHandler start) {
2229
if (parser != NULL)
2230
parser->m_startNamespaceDeclHandler = start;
2231
}
2232
2233
void XMLCALL
2234
XML_SetEndNamespaceDeclHandler(XML_Parser parser,
2235
XML_EndNamespaceDeclHandler end) {
2236
if (parser != NULL)
2237
parser->m_endNamespaceDeclHandler = end;
2238
}
2239
2240
void XMLCALL
2241
XML_SetNotStandaloneHandler(XML_Parser parser,
2242
XML_NotStandaloneHandler handler) {
2243
if (parser != NULL)
2244
parser->m_notStandaloneHandler = handler;
2245
}
2246
2247
void XMLCALL
2248
XML_SetExternalEntityRefHandler(XML_Parser parser,
2249
XML_ExternalEntityRefHandler handler) {
2250
if (parser != NULL)
2251
parser->m_externalEntityRefHandler = handler;
2252
}
2253
2254
void XMLCALL
2255
XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
2256
if (parser == NULL)
2257
return;
2258
if (arg)
2259
parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
2260
else
2261
parser->m_externalEntityRefHandlerArg = parser;
2262
}
2263
2264
void XMLCALL
2265
XML_SetSkippedEntityHandler(XML_Parser parser,
2266
XML_SkippedEntityHandler handler) {
2267
if (parser != NULL)
2268
parser->m_skippedEntityHandler = handler;
2269
}
2270
2271
void XMLCALL
2272
XML_SetUnknownEncodingHandler(XML_Parser parser,
2273
XML_UnknownEncodingHandler handler, void *data) {
2274
if (parser == NULL)
2275
return;
2276
parser->m_unknownEncodingHandler = handler;
2277
parser->m_unknownEncodingHandlerData = data;
2278
}
2279
2280
void XMLCALL
2281
XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
2282
if (parser != NULL)
2283
parser->m_elementDeclHandler = eldecl;
2284
}
2285
2286
void XMLCALL
2287
XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
2288
if (parser != NULL)
2289
parser->m_attlistDeclHandler = attdecl;
2290
}
2291
2292
void XMLCALL
2293
XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
2294
if (parser != NULL)
2295
parser->m_entityDeclHandler = handler;
2296
}
2297
2298
void XMLCALL
2299
XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
2300
if (parser != NULL)
2301
parser->m_xmlDeclHandler = handler;
2302
}
2303
2304
int XMLCALL
2305
XML_SetParamEntityParsing(XML_Parser parser,
2306
enum XML_ParamEntityParsing peParsing) {
2307
if (parser == NULL)
2308
return 0;
2309
/* block after XML_Parse()/XML_ParseBuffer() has been called */
2310
if (parserBusy(parser))
2311
return 0;
2312
#ifdef XML_DTD
2313
parser->m_paramEntityParsing = peParsing;
2314
return 1;
2315
#else
2316
return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
2317
#endif
2318
}
2319
2320
int XMLCALL
2321
XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
2322
if (parser == NULL)
2323
return 0;
2324
2325
const XML_Parser rootParser = getRootParserOf(parser, NULL);
2326
assert(! rootParser->m_parentParser);
2327
2328
/* block after XML_Parse()/XML_ParseBuffer() has been called */
2329
if (parserBusy(rootParser))
2330
return 0;
2331
rootParser->m_hash_secret_salt = hash_salt;
2332
return 1;
2333
}
2334
2335
enum XML_Status XMLCALL
2336
XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
2337
if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
2338
if (parser != NULL)
2339
parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2340
return XML_STATUS_ERROR;
2341
}
2342
switch (parser->m_parsingStatus.parsing) {
2343
case XML_SUSPENDED:
2344
parser->m_errorCode = XML_ERROR_SUSPENDED;
2345
return XML_STATUS_ERROR;
2346
case XML_FINISHED:
2347
parser->m_errorCode = XML_ERROR_FINISHED;
2348
return XML_STATUS_ERROR;
2349
case XML_INITIALIZED:
2350
if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2351
parser->m_errorCode = XML_ERROR_NO_MEMORY;
2352
return XML_STATUS_ERROR;
2353
}
2354
/* fall through */
2355
default:
2356
parser->m_parsingStatus.parsing = XML_PARSING;
2357
}
2358
2359
#if XML_CONTEXT_BYTES == 0
2360
if (parser->m_bufferPtr == parser->m_bufferEnd) {
2361
const char *end;
2362
int nLeftOver;
2363
enum XML_Status result;
2364
/* Detect overflow (a+b > MAX <==> b > MAX-a) */
2365
if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
2366
parser->m_errorCode = XML_ERROR_NO_MEMORY;
2367
parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2368
parser->m_processor = errorProcessor;
2369
return XML_STATUS_ERROR;
2370
}
2371
// though this isn't a buffer request, we assume that `len` is the app's
2372
// preferred buffer fill size, and therefore save it here.
2373
parser->m_lastBufferRequestSize = len;
2374
parser->m_parseEndByteIndex += len;
2375
parser->m_positionPtr = s;
2376
parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2377
2378
parser->m_errorCode
2379
= callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
2380
2381
if (parser->m_errorCode != XML_ERROR_NONE) {
2382
parser->m_eventEndPtr = parser->m_eventPtr;
2383
parser->m_processor = errorProcessor;
2384
return XML_STATUS_ERROR;
2385
} else {
2386
switch (parser->m_parsingStatus.parsing) {
2387
case XML_SUSPENDED:
2388
result = XML_STATUS_SUSPENDED;
2389
break;
2390
case XML_INITIALIZED:
2391
case XML_PARSING:
2392
if (isFinal) {
2393
parser->m_parsingStatus.parsing = XML_FINISHED;
2394
return XML_STATUS_OK;
2395
}
2396
/* fall through */
2397
default:
2398
result = XML_STATUS_OK;
2399
}
2400
}
2401
2402
XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
2403
&parser->m_position);
2404
nLeftOver = s + len - end;
2405
if (nLeftOver) {
2406
// Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
2407
// (and XML_ERROR_FINISHED) from XML_GetBuffer.
2408
const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
2409
parser->m_parsingStatus.parsing = XML_PARSING;
2410
void *const temp = XML_GetBuffer(parser, nLeftOver);
2411
parser->m_parsingStatus.parsing = originalStatus;
2412
// GetBuffer may have overwritten this, but we want to remember what the
2413
// app requested, not how many bytes were left over after parsing.
2414
parser->m_lastBufferRequestSize = len;
2415
if (temp == NULL) {
2416
// NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
2417
parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2418
parser->m_processor = errorProcessor;
2419
return XML_STATUS_ERROR;
2420
}
2421
// Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2422
// don't have any data to preserve, and can copy straight into the start
2423
// of the buffer rather than the GetBuffer return pointer (which may be
2424
// pointing further into the allocated buffer).
2425
memcpy(parser->m_buffer, end, nLeftOver);
2426
}
2427
parser->m_bufferPtr = parser->m_buffer;
2428
parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2429
parser->m_positionPtr = parser->m_bufferPtr;
2430
parser->m_parseEndPtr = parser->m_bufferEnd;
2431
parser->m_eventPtr = parser->m_bufferPtr;
2432
parser->m_eventEndPtr = parser->m_bufferPtr;
2433
return result;
2434
}
2435
#endif /* XML_CONTEXT_BYTES == 0 */
2436
void *buff = XML_GetBuffer(parser, len);
2437
if (buff == NULL)
2438
return XML_STATUS_ERROR;
2439
if (len > 0) {
2440
assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2441
memcpy(buff, s, len);
2442
}
2443
return XML_ParseBuffer(parser, len, isFinal);
2444
}
2445
2446
enum XML_Status XMLCALL
2447
XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2448
const char *start;
2449
enum XML_Status result = XML_STATUS_OK;
2450
2451
if (parser == NULL)
2452
return XML_STATUS_ERROR;
2453
2454
if (len < 0) {
2455
parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2456
return XML_STATUS_ERROR;
2457
}
2458
2459
switch (parser->m_parsingStatus.parsing) {
2460
case XML_SUSPENDED:
2461
parser->m_errorCode = XML_ERROR_SUSPENDED;
2462
return XML_STATUS_ERROR;
2463
case XML_FINISHED:
2464
parser->m_errorCode = XML_ERROR_FINISHED;
2465
return XML_STATUS_ERROR;
2466
case XML_INITIALIZED:
2467
/* Has someone called XML_GetBuffer successfully before? */
2468
if (! parser->m_bufferPtr) {
2469
parser->m_errorCode = XML_ERROR_NO_BUFFER;
2470
return XML_STATUS_ERROR;
2471
}
2472
2473
if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2474
parser->m_errorCode = XML_ERROR_NO_MEMORY;
2475
return XML_STATUS_ERROR;
2476
}
2477
/* fall through */
2478
default:
2479
parser->m_parsingStatus.parsing = XML_PARSING;
2480
}
2481
2482
start = parser->m_bufferPtr;
2483
parser->m_positionPtr = start;
2484
parser->m_bufferEnd += len;
2485
parser->m_parseEndPtr = parser->m_bufferEnd;
2486
parser->m_parseEndByteIndex += len;
2487
parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2488
2489
parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2490
&parser->m_bufferPtr);
2491
2492
if (parser->m_errorCode != XML_ERROR_NONE) {
2493
parser->m_eventEndPtr = parser->m_eventPtr;
2494
parser->m_processor = errorProcessor;
2495
return XML_STATUS_ERROR;
2496
} else {
2497
switch (parser->m_parsingStatus.parsing) {
2498
case XML_SUSPENDED:
2499
result = XML_STATUS_SUSPENDED;
2500
break;
2501
case XML_INITIALIZED:
2502
case XML_PARSING:
2503
if (isFinal) {
2504
parser->m_parsingStatus.parsing = XML_FINISHED;
2505
return result;
2506
}
2507
default:; /* should not happen */
2508
}
2509
}
2510
2511
XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2512
parser->m_bufferPtr, &parser->m_position);
2513
parser->m_positionPtr = parser->m_bufferPtr;
2514
return result;
2515
}
2516
2517
void *XMLCALL
2518
XML_GetBuffer(XML_Parser parser, int len) {
2519
if (parser == NULL)
2520
return NULL;
2521
if (len < 0) {
2522
parser->m_errorCode = XML_ERROR_NO_MEMORY;
2523
return NULL;
2524
}
2525
switch (parser->m_parsingStatus.parsing) {
2526
case XML_SUSPENDED:
2527
parser->m_errorCode = XML_ERROR_SUSPENDED;
2528
return NULL;
2529
case XML_FINISHED:
2530
parser->m_errorCode = XML_ERROR_FINISHED;
2531
return NULL;
2532
default:;
2533
}
2534
2535
// whether or not the request succeeds, `len` seems to be the app's preferred
2536
// buffer fill size; remember it.
2537
parser->m_lastBufferRequestSize = len;
2538
if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2539
|| parser->m_buffer == NULL) {
2540
#if XML_CONTEXT_BYTES > 0
2541
int keep;
2542
#endif /* XML_CONTEXT_BYTES > 0 */
2543
/* Do not invoke signed arithmetic overflow: */
2544
int neededSize = (int)((unsigned)len
2545
+ (unsigned)EXPAT_SAFE_PTR_DIFF(
2546
parser->m_bufferEnd, parser->m_bufferPtr));
2547
if (neededSize < 0) {
2548
parser->m_errorCode = XML_ERROR_NO_MEMORY;
2549
return NULL;
2550
}
2551
#if XML_CONTEXT_BYTES > 0
2552
keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2553
if (keep > XML_CONTEXT_BYTES)
2554
keep = XML_CONTEXT_BYTES;
2555
/* Detect and prevent integer overflow */
2556
if (keep > INT_MAX - neededSize) {
2557
parser->m_errorCode = XML_ERROR_NO_MEMORY;
2558
return NULL;
2559
}
2560
neededSize += keep;
2561
#endif /* XML_CONTEXT_BYTES > 0 */
2562
if (parser->m_buffer && parser->m_bufferPtr
2563
&& neededSize
2564
<= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2565
#if XML_CONTEXT_BYTES > 0
2566
if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2567
int offset
2568
= (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2569
- keep;
2570
/* The buffer pointers cannot be NULL here; we have at least some bytes
2571
* in the buffer */
2572
memmove(parser->m_buffer, &parser->m_buffer[offset],
2573
parser->m_bufferEnd - parser->m_bufferPtr + keep);
2574
parser->m_bufferEnd -= offset;
2575
parser->m_bufferPtr -= offset;
2576
}
2577
#else
2578
memmove(parser->m_buffer, parser->m_bufferPtr,
2579
EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2580
parser->m_bufferEnd
2581
= parser->m_buffer
2582
+ EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2583
parser->m_bufferPtr = parser->m_buffer;
2584
#endif /* XML_CONTEXT_BYTES > 0 */
2585
} else {
2586
char *newBuf;
2587
int bufferSize
2588
= (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2589
if (bufferSize == 0)
2590
bufferSize = INIT_BUFFER_SIZE;
2591
do {
2592
/* Do not invoke signed arithmetic overflow: */
2593
bufferSize = (int)(2U * (unsigned)bufferSize);
2594
} while (bufferSize < neededSize && bufferSize > 0);
2595
if (bufferSize <= 0) {
2596
parser->m_errorCode = XML_ERROR_NO_MEMORY;
2597
return NULL;
2598
}
2599
// NOTE: We are avoiding MALLOC(..) here to leave limiting
2600
// the input size to the application using Expat.
2601
newBuf = parser->m_mem.malloc_fcn(bufferSize);
2602
if (newBuf == 0) {
2603
parser->m_errorCode = XML_ERROR_NO_MEMORY;
2604
return NULL;
2605
}
2606
parser->m_bufferLim = newBuf + bufferSize;
2607
#if XML_CONTEXT_BYTES > 0
2608
if (parser->m_bufferPtr) {
2609
memcpy(newBuf, &parser->m_bufferPtr[-keep],
2610
EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2611
+ keep);
2612
// NOTE: We are avoiding FREE(..) here because parser->m_buffer
2613
// is not being allocated with MALLOC(..) but with plain
2614
// .malloc_fcn(..).
2615
parser->m_mem.free_fcn(parser->m_buffer);
2616
parser->m_buffer = newBuf;
2617
parser->m_bufferEnd
2618
= parser->m_buffer
2619
+ EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2620
+ keep;
2621
parser->m_bufferPtr = parser->m_buffer + keep;
2622
} else {
2623
/* This must be a brand new buffer with no data in it yet */
2624
parser->m_bufferEnd = newBuf;
2625
parser->m_bufferPtr = parser->m_buffer = newBuf;
2626
}
2627
#else
2628
if (parser->m_bufferPtr) {
2629
memcpy(newBuf, parser->m_bufferPtr,
2630
EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2631
// NOTE: We are avoiding FREE(..) here because parser->m_buffer
2632
// is not being allocated with MALLOC(..) but with plain
2633
// .malloc_fcn(..).
2634
parser->m_mem.free_fcn(parser->m_buffer);
2635
parser->m_bufferEnd
2636
= newBuf
2637
+ EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2638
} else {
2639
/* This must be a brand new buffer with no data in it yet */
2640
parser->m_bufferEnd = newBuf;
2641
}
2642
parser->m_bufferPtr = parser->m_buffer = newBuf;
2643
#endif /* XML_CONTEXT_BYTES > 0 */
2644
}
2645
parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2646
parser->m_positionPtr = NULL;
2647
}
2648
return parser->m_bufferEnd;
2649
}
2650
2651
static void
2652
triggerReenter(XML_Parser parser) {
2653
parser->m_reenter = XML_TRUE;
2654
}
2655
2656
enum XML_Status XMLCALL
2657
XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2658
if (parser == NULL)
2659
return XML_STATUS_ERROR;
2660
switch (parser->m_parsingStatus.parsing) {
2661
case XML_INITIALIZED:
2662
parser->m_errorCode = XML_ERROR_NOT_STARTED;
2663
return XML_STATUS_ERROR;
2664
case XML_SUSPENDED:
2665
if (resumable) {
2666
parser->m_errorCode = XML_ERROR_SUSPENDED;
2667
return XML_STATUS_ERROR;
2668
}
2669
parser->m_parsingStatus.parsing = XML_FINISHED;
2670
break;
2671
case XML_FINISHED:
2672
parser->m_errorCode = XML_ERROR_FINISHED;
2673
return XML_STATUS_ERROR;
2674
case XML_PARSING:
2675
if (resumable) {
2676
#ifdef XML_DTD
2677
if (parser->m_isParamEntity) {
2678
parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2679
return XML_STATUS_ERROR;
2680
}
2681
#endif
2682
parser->m_parsingStatus.parsing = XML_SUSPENDED;
2683
} else
2684
parser->m_parsingStatus.parsing = XML_FINISHED;
2685
break;
2686
default:
2687
assert(0);
2688
}
2689
return XML_STATUS_OK;
2690
}
2691
2692
enum XML_Status XMLCALL
2693
XML_ResumeParser(XML_Parser parser) {
2694
enum XML_Status result = XML_STATUS_OK;
2695
2696
if (parser == NULL)
2697
return XML_STATUS_ERROR;
2698
if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2699
parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2700
return XML_STATUS_ERROR;
2701
}
2702
parser->m_parsingStatus.parsing = XML_PARSING;
2703
2704
parser->m_errorCode = callProcessor(
2705
parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2706
2707
if (parser->m_errorCode != XML_ERROR_NONE) {
2708
parser->m_eventEndPtr = parser->m_eventPtr;
2709
parser->m_processor = errorProcessor;
2710
return XML_STATUS_ERROR;
2711
} else {
2712
switch (parser->m_parsingStatus.parsing) {
2713
case XML_SUSPENDED:
2714
result = XML_STATUS_SUSPENDED;
2715
break;
2716
case XML_INITIALIZED:
2717
case XML_PARSING:
2718
if (parser->m_parsingStatus.finalBuffer) {
2719
parser->m_parsingStatus.parsing = XML_FINISHED;
2720
return result;
2721
}
2722
default:;
2723
}
2724
}
2725
2726
XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2727
parser->m_bufferPtr, &parser->m_position);
2728
parser->m_positionPtr = parser->m_bufferPtr;
2729
return result;
2730
}
2731
2732
void XMLCALL
2733
XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2734
if (parser == NULL)
2735
return;
2736
assert(status != NULL);
2737
*status = parser->m_parsingStatus;
2738
}
2739
2740
enum XML_Error XMLCALL
2741
XML_GetErrorCode(XML_Parser parser) {
2742
if (parser == NULL)
2743
return XML_ERROR_INVALID_ARGUMENT;
2744
return parser->m_errorCode;
2745
}
2746
2747
XML_Index XMLCALL
2748
XML_GetCurrentByteIndex(XML_Parser parser) {
2749
if (parser == NULL)
2750
return -1;
2751
if (parser->m_eventPtr)
2752
return (XML_Index)(parser->m_parseEndByteIndex
2753
- (parser->m_parseEndPtr - parser->m_eventPtr));
2754
return -1;
2755
}
2756
2757
int XMLCALL
2758
XML_GetCurrentByteCount(XML_Parser parser) {
2759
if (parser == NULL)
2760
return 0;
2761
if (parser->m_eventEndPtr && parser->m_eventPtr)
2762
return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2763
return 0;
2764
}
2765
2766
const char *XMLCALL
2767
XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2768
#if XML_CONTEXT_BYTES > 0
2769
if (parser == NULL)
2770
return NULL;
2771
if (parser->m_eventPtr && parser->m_buffer) {
2772
if (offset != NULL)
2773
*offset = (int)(parser->m_eventPtr - parser->m_buffer);
2774
if (size != NULL)
2775
*size = (int)(parser->m_bufferEnd - parser->m_buffer);
2776
return parser->m_buffer;
2777
}
2778
#else
2779
(void)parser;
2780
(void)offset;
2781
(void)size;
2782
#endif /* XML_CONTEXT_BYTES > 0 */
2783
return (const char *)0;
2784
}
2785
2786
XML_Size XMLCALL
2787
XML_GetCurrentLineNumber(XML_Parser parser) {
2788
if (parser == NULL)
2789
return 0;
2790
if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2791
XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2792
parser->m_eventPtr, &parser->m_position);
2793
parser->m_positionPtr = parser->m_eventPtr;
2794
}
2795
return parser->m_position.lineNumber + 1;
2796
}
2797
2798
XML_Size XMLCALL
2799
XML_GetCurrentColumnNumber(XML_Parser parser) {
2800
if (parser == NULL)
2801
return 0;
2802
if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2803
XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2804
parser->m_eventPtr, &parser->m_position);
2805
parser->m_positionPtr = parser->m_eventPtr;
2806
}
2807
return parser->m_position.columnNumber;
2808
}
2809
2810
void XMLCALL
2811
XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2812
if (parser == NULL)
2813
return;
2814
2815
// NOTE: We are avoiding FREE(..) here because the content model
2816
// has been created using plain .malloc_fcn(..) rather than MALLOC(..).
2817
parser->m_mem.free_fcn(model);
2818
}
2819
2820
void *XMLCALL
2821
XML_MemMalloc(XML_Parser parser, size_t size) {
2822
if (parser == NULL)
2823
return NULL;
2824
2825
// NOTE: We are avoiding MALLOC(..) here to not include
2826
// user allocations with allocation tracking and limiting.
2827
return parser->m_mem.malloc_fcn(size);
2828
}
2829
2830
void *XMLCALL
2831
XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2832
if (parser == NULL)
2833
return NULL;
2834
2835
// NOTE: We are avoiding REALLOC(..) here to not include
2836
// user allocations with allocation tracking and limiting.
2837
return parser->m_mem.realloc_fcn(ptr, size);
2838
}
2839
2840
void XMLCALL
2841
XML_MemFree(XML_Parser parser, void *ptr) {
2842
if (parser == NULL)
2843
return;
2844
2845
// NOTE: We are avoiding FREE(..) here because XML_MemMalloc and
2846
// XML_MemRealloc are not using MALLOC(..) and REALLOC(..)
2847
// but plain .malloc_fcn(..) and .realloc_fcn(..), internally.
2848
parser->m_mem.free_fcn(ptr);
2849
}
2850
2851
void XMLCALL
2852
XML_DefaultCurrent(XML_Parser parser) {
2853
if (parser == NULL)
2854
return;
2855
if (parser->m_defaultHandler) {
2856
if (parser->m_openInternalEntities)
2857
reportDefault(parser, parser->m_internalEncoding,
2858
parser->m_openInternalEntities->internalEventPtr,
2859
parser->m_openInternalEntities->internalEventEndPtr);
2860
else
2861
reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2862
parser->m_eventEndPtr);
2863
}
2864
}
2865
2866
const XML_LChar *XMLCALL
2867
XML_ErrorString(enum XML_Error code) {
2868
switch (code) {
2869
case XML_ERROR_NONE:
2870
return NULL;
2871
case XML_ERROR_NO_MEMORY:
2872
return XML_L("out of memory");
2873
case XML_ERROR_SYNTAX:
2874
return XML_L("syntax error");
2875
case XML_ERROR_NO_ELEMENTS:
2876
return XML_L("no element found");
2877
case XML_ERROR_INVALID_TOKEN:
2878
return XML_L("not well-formed (invalid token)");
2879
case XML_ERROR_UNCLOSED_TOKEN:
2880
return XML_L("unclosed token");
2881
case XML_ERROR_PARTIAL_CHAR:
2882
return XML_L("partial character");
2883
case XML_ERROR_TAG_MISMATCH:
2884
return XML_L("mismatched tag");
2885
case XML_ERROR_DUPLICATE_ATTRIBUTE:
2886
return XML_L("duplicate attribute");
2887
case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2888
return XML_L("junk after document element");
2889
case XML_ERROR_PARAM_ENTITY_REF:
2890
return XML_L("illegal parameter entity reference");
2891
case XML_ERROR_UNDEFINED_ENTITY:
2892
return XML_L("undefined entity");
2893
case XML_ERROR_RECURSIVE_ENTITY_REF:
2894
return XML_L("recursive entity reference");
2895
case XML_ERROR_ASYNC_ENTITY:
2896
return XML_L("asynchronous entity");
2897
case XML_ERROR_BAD_CHAR_REF:
2898
return XML_L("reference to invalid character number");
2899
case XML_ERROR_BINARY_ENTITY_REF:
2900
return XML_L("reference to binary entity");
2901
case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2902
return XML_L("reference to external entity in attribute");
2903
case XML_ERROR_MISPLACED_XML_PI:
2904
return XML_L("XML or text declaration not at start of entity");
2905
case XML_ERROR_UNKNOWN_ENCODING:
2906
return XML_L("unknown encoding");
2907
case XML_ERROR_INCORRECT_ENCODING:
2908
return XML_L("encoding specified in XML declaration is incorrect");
2909
case XML_ERROR_UNCLOSED_CDATA_SECTION:
2910
return XML_L("unclosed CDATA section");
2911
case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2912
return XML_L("error in processing external entity reference");
2913
case XML_ERROR_NOT_STANDALONE:
2914
return XML_L("document is not standalone");
2915
case XML_ERROR_UNEXPECTED_STATE:
2916
return XML_L("unexpected parser state - please send a bug report");
2917
case XML_ERROR_ENTITY_DECLARED_IN_PE:
2918
return XML_L("entity declared in parameter entity");
2919
case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2920
return XML_L("requested feature requires XML_DTD support in Expat");
2921
case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2922
return XML_L("cannot change setting once parsing has begun");
2923
/* Added in 1.95.7. */
2924
case XML_ERROR_UNBOUND_PREFIX:
2925
return XML_L("unbound prefix");
2926
/* Added in 1.95.8. */
2927
case XML_ERROR_UNDECLARING_PREFIX:
2928
return XML_L("must not undeclare prefix");
2929
case XML_ERROR_INCOMPLETE_PE:
2930
return XML_L("incomplete markup in parameter entity");
2931
case XML_ERROR_XML_DECL:
2932
return XML_L("XML declaration not well-formed");
2933
case XML_ERROR_TEXT_DECL:
2934
return XML_L("text declaration not well-formed");
2935
case XML_ERROR_PUBLICID:
2936
return XML_L("illegal character(s) in public id");
2937
case XML_ERROR_SUSPENDED:
2938
return XML_L("parser suspended");
2939
case XML_ERROR_NOT_SUSPENDED:
2940
return XML_L("parser not suspended");
2941
case XML_ERROR_ABORTED:
2942
return XML_L("parsing aborted");
2943
case XML_ERROR_FINISHED:
2944
return XML_L("parsing finished");
2945
case XML_ERROR_SUSPEND_PE:
2946
return XML_L("cannot suspend in external parameter entity");
2947
/* Added in 2.0.0. */
2948
case XML_ERROR_RESERVED_PREFIX_XML:
2949
return XML_L(
2950
"reserved prefix (xml) must not be undeclared or bound to another namespace name");
2951
case XML_ERROR_RESERVED_PREFIX_XMLNS:
2952
return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2953
case XML_ERROR_RESERVED_NAMESPACE_URI:
2954
return XML_L(
2955
"prefix must not be bound to one of the reserved namespace names");
2956
/* Added in 2.2.5. */
2957
case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2958
return XML_L("invalid argument");
2959
/* Added in 2.3.0. */
2960
case XML_ERROR_NO_BUFFER:
2961
return XML_L(
2962
"a successful prior call to function XML_GetBuffer is required");
2963
/* Added in 2.4.0. */
2964
case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2965
return XML_L(
2966
"limit on input amplification factor (from DTD and entities) breached");
2967
/* Added in 2.6.4. */
2968
case XML_ERROR_NOT_STARTED:
2969
return XML_L("parser not started");
2970
}
2971
return NULL;
2972
}
2973
2974
const XML_LChar *XMLCALL
2975
XML_ExpatVersion(void) {
2976
/* V1 is used to string-ize the version number. However, it would
2977
string-ize the actual version macro *names* unless we get them
2978
substituted before being passed to V1. CPP is defined to expand
2979
a macro, then rescan for more expansions. Thus, we use V2 to expand
2980
the version macros, then CPP will expand the resulting V1() macro
2981
with the correct numerals. */
2982
/* ### I'm assuming cpp is portable in this respect... */
2983
2984
#define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2985
#define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2986
2987
return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2988
2989
#undef V1
2990
#undef V2
2991
}
2992
2993
XML_Expat_Version XMLCALL
2994
XML_ExpatVersionInfo(void) {
2995
XML_Expat_Version version;
2996
2997
version.major = XML_MAJOR_VERSION;
2998
version.minor = XML_MINOR_VERSION;
2999
version.micro = XML_MICRO_VERSION;
3000
3001
return version;
3002
}
3003
3004
const XML_Feature *XMLCALL
3005
XML_GetFeatureList(void) {
3006
static const XML_Feature features[] = {
3007
{XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
3008
sizeof(XML_Char)},
3009
{XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
3010
sizeof(XML_LChar)},
3011
#ifdef XML_UNICODE
3012
{XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
3013
#endif
3014
#ifdef XML_UNICODE_WCHAR_T
3015
{XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
3016
#endif
3017
#ifdef XML_DTD
3018
{XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
3019
#endif
3020
#if XML_CONTEXT_BYTES > 0
3021
{XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
3022
XML_CONTEXT_BYTES},
3023
#endif
3024
#ifdef XML_MIN_SIZE
3025
{XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
3026
#endif
3027
#ifdef XML_NS
3028
{XML_FEATURE_NS, XML_L("XML_NS"), 0},
3029
#endif
3030
#ifdef XML_LARGE_SIZE
3031
{XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
3032
#endif
3033
#ifdef XML_ATTR_INFO
3034
{XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
3035
#endif
3036
#if XML_GE == 1
3037
/* Added in Expat 2.4.0 for XML_DTD defined and
3038
* added in Expat 2.6.0 for XML_GE == 1. */
3039
{XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
3040
XML_L("XML_BLAP_MAX_AMP"),
3041
(long int)
3042
EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
3043
{XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
3044
XML_L("XML_BLAP_ACT_THRES"),
3045
EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
3046
/* Added in Expat 2.6.0. */
3047
{XML_FEATURE_GE, XML_L("XML_GE"), 0},
3048
/* Added in Expat 2.7.2. */
3049
{XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT,
3050
XML_L("XML_AT_MAX_AMP"),
3051
(long int)EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT},
3052
{XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT,
3053
XML_L("XML_AT_ACT_THRES"),
3054
(long int)EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT},
3055
#endif
3056
{XML_FEATURE_END, NULL, 0}};
3057
3058
return features;
3059
}
3060
3061
#if XML_GE == 1
3062
XML_Bool XMLCALL
3063
XML_SetBillionLaughsAttackProtectionMaximumAmplification(
3064
XML_Parser parser, float maximumAmplificationFactor) {
3065
if ((parser == NULL) || (parser->m_parentParser != NULL)
3066
|| isnan(maximumAmplificationFactor)
3067
|| (maximumAmplificationFactor < 1.0f)) {
3068
return XML_FALSE;
3069
}
3070
parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
3071
return XML_TRUE;
3072
}
3073
3074
XML_Bool XMLCALL
3075
XML_SetBillionLaughsAttackProtectionActivationThreshold(
3076
XML_Parser parser, unsigned long long activationThresholdBytes) {
3077
if ((parser == NULL) || (parser->m_parentParser != NULL)) {
3078
return XML_FALSE;
3079
}
3080
parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
3081
return XML_TRUE;
3082
}
3083
3084
XML_Bool XMLCALL
3085
XML_SetAllocTrackerMaximumAmplification(XML_Parser parser,
3086
float maximumAmplificationFactor) {
3087
if ((parser == NULL) || (parser->m_parentParser != NULL)
3088
|| isnan(maximumAmplificationFactor)
3089
|| (maximumAmplificationFactor < 1.0f)) {
3090
return XML_FALSE;
3091
}
3092
parser->m_alloc_tracker.maximumAmplificationFactor
3093
= maximumAmplificationFactor;
3094
return XML_TRUE;
3095
}
3096
3097
XML_Bool XMLCALL
3098
XML_SetAllocTrackerActivationThreshold(
3099
XML_Parser parser, unsigned long long activationThresholdBytes) {
3100
if ((parser == NULL) || (parser->m_parentParser != NULL)) {
3101
return XML_FALSE;
3102
}
3103
parser->m_alloc_tracker.activationThresholdBytes = activationThresholdBytes;
3104
return XML_TRUE;
3105
}
3106
#endif /* XML_GE == 1 */
3107
3108
XML_Bool XMLCALL
3109
XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
3110
if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
3111
parser->m_reparseDeferralEnabled = enabled;
3112
return XML_TRUE;
3113
}
3114
return XML_FALSE;
3115
}
3116
3117
/* Initially tag->rawName always points into the parse buffer;
3118
for those TAG instances opened while the current parse buffer was
3119
processed, and not yet closed, we need to store tag->rawName in a more
3120
permanent location, since the parse buffer is about to be discarded.
3121
*/
3122
static XML_Bool
3123
storeRawNames(XML_Parser parser) {
3124
TAG *tag = parser->m_tagStack;
3125
while (tag) {
3126
size_t bufSize;
3127
size_t nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
3128
size_t rawNameLen;
3129
char *rawNameBuf = tag->buf + nameLen;
3130
/* Stop if already stored. Since m_tagStack is a stack, we can stop
3131
at the first entry that has already been copied; everything
3132
below it in the stack is already been accounted for in a
3133
previous call to this function.
3134
*/
3135
if (tag->rawName == rawNameBuf)
3136
break;
3137
/* For reuse purposes we need to ensure that the
3138
size of tag->buf is a multiple of sizeof(XML_Char).
3139
*/
3140
rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
3141
/* Detect and prevent integer overflow. */
3142
if (rawNameLen > (size_t)INT_MAX - nameLen)
3143
return XML_FALSE;
3144
bufSize = nameLen + rawNameLen;
3145
if (bufSize > (size_t)(tag->bufEnd - tag->buf)) {
3146
char *temp = REALLOC(parser, tag->buf, bufSize);
3147
if (temp == NULL)
3148
return XML_FALSE;
3149
/* if tag->name.str points to tag->buf (only when namespace
3150
processing is off) then we have to update it
3151
*/
3152
if (tag->name.str == (XML_Char *)tag->buf)
3153
tag->name.str = (XML_Char *)temp;
3154
/* if tag->name.localPart is set (when namespace processing is on)
3155
then update it as well, since it will always point into tag->buf
3156
*/
3157
if (tag->name.localPart)
3158
tag->name.localPart
3159
= (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
3160
tag->buf = temp;
3161
tag->bufEnd = temp + bufSize;
3162
rawNameBuf = temp + nameLen;
3163
}
3164
memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
3165
tag->rawName = rawNameBuf;
3166
tag = tag->parent;
3167
}
3168
return XML_TRUE;
3169
}
3170
3171
static enum XML_Error PTRCALL
3172
contentProcessor(XML_Parser parser, const char *start, const char *end,
3173
const char **endPtr) {
3174
enum XML_Error result = doContent(
3175
parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end,
3176
endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer,
3177
XML_ACCOUNT_DIRECT);
3178
if (result == XML_ERROR_NONE) {
3179
if (! storeRawNames(parser))
3180
return XML_ERROR_NO_MEMORY;
3181
}
3182
return result;
3183
}
3184
3185
static enum XML_Error PTRCALL
3186
externalEntityInitProcessor(XML_Parser parser, const char *start,
3187
const char *end, const char **endPtr) {
3188
enum XML_Error result = initializeEncoding(parser);
3189
if (result != XML_ERROR_NONE)
3190
return result;
3191
parser->m_processor = externalEntityInitProcessor2;
3192
return externalEntityInitProcessor2(parser, start, end, endPtr);
3193
}
3194
3195
static enum XML_Error PTRCALL
3196
externalEntityInitProcessor2(XML_Parser parser, const char *start,
3197
const char *end, const char **endPtr) {
3198
const char *next = start; /* XmlContentTok doesn't always set the last arg */
3199
int tok = XmlContentTok(parser->m_encoding, start, end, &next);
3200
switch (tok) {
3201
case XML_TOK_BOM:
3202
#if XML_GE == 1
3203
if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
3204
XML_ACCOUNT_DIRECT)) {
3205
accountingOnAbort(parser);
3206
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
3207
}
3208
#endif /* XML_GE == 1 */
3209
3210
/* If we are at the end of the buffer, this would cause the next stage,
3211
i.e. externalEntityInitProcessor3, to pass control directly to
3212
doContent (by detecting XML_TOK_NONE) without processing any xml text
3213
declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
3214
*/
3215
if (next == end && ! parser->m_parsingStatus.finalBuffer) {
3216
*endPtr = next;
3217
return XML_ERROR_NONE;
3218
}
3219
start = next;
3220
break;
3221
case XML_TOK_PARTIAL:
3222
if (! parser->m_parsingStatus.finalBuffer) {
3223
*endPtr = start;
3224
return XML_ERROR_NONE;
3225
}
3226
parser->m_eventPtr = start;
3227
return XML_ERROR_UNCLOSED_TOKEN;
3228
case XML_TOK_PARTIAL_CHAR:
3229
if (! parser->m_parsingStatus.finalBuffer) {
3230
*endPtr = start;
3231
return XML_ERROR_NONE;
3232
}
3233
parser->m_eventPtr = start;
3234
return XML_ERROR_PARTIAL_CHAR;
3235
}
3236
parser->m_processor = externalEntityInitProcessor3;
3237
return externalEntityInitProcessor3(parser, start, end, endPtr);
3238
}
3239
3240
static enum XML_Error PTRCALL
3241
externalEntityInitProcessor3(XML_Parser parser, const char *start,
3242
const char *end, const char **endPtr) {
3243
int tok;
3244
const char *next = start; /* XmlContentTok doesn't always set the last arg */
3245
parser->m_eventPtr = start;
3246
tok = XmlContentTok(parser->m_encoding, start, end, &next);
3247
/* Note: These bytes are accounted later in:
3248
- processXmlDecl
3249
- externalEntityContentProcessor
3250
*/
3251
parser->m_eventEndPtr = next;
3252
3253
switch (tok) {
3254
case XML_TOK_XML_DECL: {
3255
enum XML_Error result;
3256
result = processXmlDecl(parser, 1, start, next);
3257
if (result != XML_ERROR_NONE)
3258
return result;
3259
switch (parser->m_parsingStatus.parsing) {
3260
case XML_SUSPENDED:
3261
*endPtr = next;
3262
return XML_ERROR_NONE;
3263
case XML_FINISHED:
3264
return XML_ERROR_ABORTED;
3265
case XML_PARSING:
3266
if (parser->m_reenter) {
3267
return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
3268
}
3269
/* Fall through */
3270
default:
3271
start = next;
3272
}
3273
} break;
3274
case XML_TOK_PARTIAL:
3275
if (! parser->m_parsingStatus.finalBuffer) {
3276
*endPtr = start;
3277
return XML_ERROR_NONE;
3278
}
3279
return XML_ERROR_UNCLOSED_TOKEN;
3280
case XML_TOK_PARTIAL_CHAR:
3281
if (! parser->m_parsingStatus.finalBuffer) {
3282
*endPtr = start;
3283
return XML_ERROR_NONE;
3284
}
3285
return XML_ERROR_PARTIAL_CHAR;
3286
}
3287
parser->m_processor = externalEntityContentProcessor;
3288
parser->m_tagLevel = 1;
3289
return externalEntityContentProcessor(parser, start, end, endPtr);
3290
}
3291
3292
static enum XML_Error PTRCALL
3293
externalEntityContentProcessor(XML_Parser parser, const char *start,
3294
const char *end, const char **endPtr) {
3295
enum XML_Error result
3296
= doContent(parser, 1, parser->m_encoding, start, end, endPtr,
3297
(XML_Bool)! parser->m_parsingStatus.finalBuffer,
3298
XML_ACCOUNT_ENTITY_EXPANSION);
3299
if (result == XML_ERROR_NONE) {
3300
if (! storeRawNames(parser))
3301
return XML_ERROR_NO_MEMORY;
3302
}
3303
return result;
3304
}
3305
3306
static enum XML_Error
3307
doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
3308
const char *s, const char *end, const char **nextPtr,
3309
XML_Bool haveMore, enum XML_Account account) {
3310
/* save one level of indirection */
3311
DTD *const dtd = parser->m_dtd;
3312
3313
const char **eventPP;
3314
const char **eventEndPP;
3315
if (enc == parser->m_encoding) {
3316
eventPP = &parser->m_eventPtr;
3317
eventEndPP = &parser->m_eventEndPtr;
3318
} else {
3319
eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3320
eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
3321
}
3322
*eventPP = s;
3323
3324
for (;;) {
3325
const char *next = s; /* XmlContentTok doesn't always set the last arg */
3326
int tok = XmlContentTok(enc, s, end, &next);
3327
#if XML_GE == 1
3328
const char *accountAfter
3329
= ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
3330
? (haveMore ? s /* i.e. 0 bytes */ : end)
3331
: next;
3332
if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
3333
account)) {
3334
accountingOnAbort(parser);
3335
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
3336
}
3337
#endif
3338
*eventEndPP = next;
3339
switch (tok) {
3340
case XML_TOK_TRAILING_CR:
3341
if (haveMore) {
3342
*nextPtr = s;
3343
return XML_ERROR_NONE;
3344
}
3345
*eventEndPP = end;
3346
if (parser->m_characterDataHandler) {
3347
XML_Char c = 0xA;
3348
parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3349
} else if (parser->m_defaultHandler)
3350
reportDefault(parser, enc, s, end);
3351
/* We are at the end of the final buffer, should we check for
3352
XML_SUSPENDED, XML_FINISHED?
3353
*/
3354
if (startTagLevel == 0)
3355
return XML_ERROR_NO_ELEMENTS;
3356
if (parser->m_tagLevel != startTagLevel)
3357
return XML_ERROR_ASYNC_ENTITY;
3358
*nextPtr = end;
3359
return XML_ERROR_NONE;
3360
case XML_TOK_NONE:
3361
if (haveMore) {
3362
*nextPtr = s;
3363
return XML_ERROR_NONE;
3364
}
3365
if (startTagLevel > 0) {
3366
if (parser->m_tagLevel != startTagLevel)
3367
return XML_ERROR_ASYNC_ENTITY;
3368
*nextPtr = s;
3369
return XML_ERROR_NONE;
3370
}
3371
return XML_ERROR_NO_ELEMENTS;
3372
case XML_TOK_INVALID:
3373
*eventPP = next;
3374
return XML_ERROR_INVALID_TOKEN;
3375
case XML_TOK_PARTIAL:
3376
if (haveMore) {
3377
*nextPtr = s;
3378
return XML_ERROR_NONE;
3379
}
3380
return XML_ERROR_UNCLOSED_TOKEN;
3381
case XML_TOK_PARTIAL_CHAR:
3382
if (haveMore) {
3383
*nextPtr = s;
3384
return XML_ERROR_NONE;
3385
}
3386
return XML_ERROR_PARTIAL_CHAR;
3387
case XML_TOK_ENTITY_REF: {
3388
const XML_Char *name;
3389
ENTITY *entity;
3390
XML_Char ch = (XML_Char)XmlPredefinedEntityName(
3391
enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
3392
if (ch) {
3393
#if XML_GE == 1
3394
/* NOTE: We are replacing 4-6 characters original input for 1 character
3395
* so there is no amplification and hence recording without
3396
* protection. */
3397
accountingDiffTolerated(parser, tok, (char *)&ch,
3398
((char *)&ch) + sizeof(XML_Char), __LINE__,
3399
XML_ACCOUNT_ENTITY_EXPANSION);
3400
#endif /* XML_GE == 1 */
3401
if (parser->m_characterDataHandler)
3402
parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
3403
else if (parser->m_defaultHandler)
3404
reportDefault(parser, enc, s, next);
3405
break;
3406
}
3407
name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
3408
next - enc->minBytesPerChar);
3409
if (! name)
3410
return XML_ERROR_NO_MEMORY;
3411
entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
3412
poolDiscard(&dtd->pool);
3413
/* First, determine if a check for an existing declaration is needed;
3414
if yes, check that the entity exists, and that it is internal,
3415
otherwise call the skipped entity or default handler.
3416
*/
3417
if (! dtd->hasParamEntityRefs || dtd->standalone) {
3418
if (! entity)
3419
return XML_ERROR_UNDEFINED_ENTITY;
3420
else if (! entity->is_internal)
3421
return XML_ERROR_ENTITY_DECLARED_IN_PE;
3422
} else if (! entity) {
3423
if (parser->m_skippedEntityHandler)
3424
parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
3425
else if (parser->m_defaultHandler)
3426
reportDefault(parser, enc, s, next);
3427
break;
3428
}
3429
if (entity->open)
3430
return XML_ERROR_RECURSIVE_ENTITY_REF;
3431
if (entity->notation)
3432
return XML_ERROR_BINARY_ENTITY_REF;
3433
if (entity->textPtr) {
3434
enum XML_Error result;
3435
if (! parser->m_defaultExpandInternalEntities) {
3436
if (parser->m_skippedEntityHandler)
3437
parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
3438
0);
3439
else if (parser->m_defaultHandler)
3440
reportDefault(parser, enc, s, next);
3441
break;
3442
}
3443
result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL);
3444
if (result != XML_ERROR_NONE)
3445
return result;
3446
} else if (parser->m_externalEntityRefHandler) {
3447
const XML_Char *context;
3448
entity->open = XML_TRUE;
3449
context = getContext(parser);
3450
entity->open = XML_FALSE;
3451
if (! context)
3452
return XML_ERROR_NO_MEMORY;
3453
if (! parser->m_externalEntityRefHandler(
3454
parser->m_externalEntityRefHandlerArg, context, entity->base,
3455
entity->systemId, entity->publicId))
3456
return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
3457
poolDiscard(&parser->m_tempPool);
3458
} else if (parser->m_defaultHandler)
3459
reportDefault(parser, enc, s, next);
3460
break;
3461
}
3462
case XML_TOK_START_TAG_NO_ATTS:
3463
/* fall through */
3464
case XML_TOK_START_TAG_WITH_ATTS: {
3465
TAG *tag;
3466
enum XML_Error result;
3467
XML_Char *toPtr;
3468
if (parser->m_freeTagList) {
3469
tag = parser->m_freeTagList;
3470
parser->m_freeTagList = parser->m_freeTagList->parent;
3471
} else {
3472
tag = MALLOC(parser, sizeof(TAG));
3473
if (! tag)
3474
return XML_ERROR_NO_MEMORY;
3475
tag->buf = MALLOC(parser, INIT_TAG_BUF_SIZE);
3476
if (! tag->buf) {
3477
FREE(parser, tag);
3478
return XML_ERROR_NO_MEMORY;
3479
}
3480
tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
3481
}
3482
tag->bindings = NULL;
3483
tag->parent = parser->m_tagStack;
3484
parser->m_tagStack = tag;
3485
tag->name.localPart = NULL;
3486
tag->name.prefix = NULL;
3487
tag->rawName = s + enc->minBytesPerChar;
3488
tag->rawNameLength = XmlNameLength(enc, tag->rawName);
3489
++parser->m_tagLevel;
3490
{
3491
const char *rawNameEnd = tag->rawName + tag->rawNameLength;
3492
const char *fromPtr = tag->rawName;
3493
toPtr = (XML_Char *)tag->buf;
3494
for (;;) {
3495
int bufSize;
3496
int convLen;
3497
const enum XML_Convert_Result convert_res
3498
= XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3499
(ICHAR *)tag->bufEnd - 1);
3500
convLen = (int)(toPtr - (XML_Char *)tag->buf);
3501
if ((fromPtr >= rawNameEnd)
3502
|| (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3503
tag->name.strLen = convLen;
3504
break;
3505
}
3506
bufSize = (int)(tag->bufEnd - tag->buf) << 1;
3507
{
3508
char *temp = REALLOC(parser, tag->buf, bufSize);
3509
if (temp == NULL)
3510
return XML_ERROR_NO_MEMORY;
3511
tag->buf = temp;
3512
tag->bufEnd = temp + bufSize;
3513
toPtr = (XML_Char *)temp + convLen;
3514
}
3515
}
3516
}
3517
tag->name.str = (XML_Char *)tag->buf;
3518
*toPtr = XML_T('\0');
3519
result
3520
= storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3521
if (result)
3522
return result;
3523
if (parser->m_startElementHandler)
3524
parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3525
(const XML_Char **)parser->m_atts);
3526
else if (parser->m_defaultHandler)
3527
reportDefault(parser, enc, s, next);
3528
poolClear(&parser->m_tempPool);
3529
break;
3530
}
3531
case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3532
/* fall through */
3533
case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3534
const char *rawName = s + enc->minBytesPerChar;
3535
enum XML_Error result;
3536
BINDING *bindings = NULL;
3537
XML_Bool noElmHandlers = XML_TRUE;
3538
TAG_NAME name;
3539
name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3540
rawName + XmlNameLength(enc, rawName));
3541
if (! name.str)
3542
return XML_ERROR_NO_MEMORY;
3543
poolFinish(&parser->m_tempPool);
3544
result = storeAtts(parser, enc, s, &name, &bindings,
3545
XML_ACCOUNT_NONE /* token spans whole start tag */);
3546
if (result != XML_ERROR_NONE) {
3547
freeBindings(parser, bindings);
3548
return result;
3549
}
3550
poolFinish(&parser->m_tempPool);
3551
if (parser->m_startElementHandler) {
3552
parser->m_startElementHandler(parser->m_handlerArg, name.str,
3553
(const XML_Char **)parser->m_atts);
3554
noElmHandlers = XML_FALSE;
3555
}
3556
if (parser->m_endElementHandler) {
3557
if (parser->m_startElementHandler)
3558
*eventPP = *eventEndPP;
3559
parser->m_endElementHandler(parser->m_handlerArg, name.str);
3560
noElmHandlers = XML_FALSE;
3561
}
3562
if (noElmHandlers && parser->m_defaultHandler)
3563
reportDefault(parser, enc, s, next);
3564
poolClear(&parser->m_tempPool);
3565
freeBindings(parser, bindings);
3566
}
3567
if ((parser->m_tagLevel == 0)
3568
&& (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3569
if (parser->m_parsingStatus.parsing == XML_SUSPENDED
3570
|| (parser->m_parsingStatus.parsing == XML_PARSING
3571
&& parser->m_reenter))
3572
parser->m_processor = epilogProcessor;
3573
else
3574
return epilogProcessor(parser, next, end, nextPtr);
3575
}
3576
break;
3577
case XML_TOK_END_TAG:
3578
if (parser->m_tagLevel == startTagLevel)
3579
return XML_ERROR_ASYNC_ENTITY;
3580
else {
3581
int len;
3582
const char *rawName;
3583
TAG *tag = parser->m_tagStack;
3584
rawName = s + enc->minBytesPerChar * 2;
3585
len = XmlNameLength(enc, rawName);
3586
if (len != tag->rawNameLength
3587
|| memcmp(tag->rawName, rawName, len) != 0) {
3588
*eventPP = rawName;
3589
return XML_ERROR_TAG_MISMATCH;
3590
}
3591
parser->m_tagStack = tag->parent;
3592
tag->parent = parser->m_freeTagList;
3593
parser->m_freeTagList = tag;
3594
--parser->m_tagLevel;
3595
if (parser->m_endElementHandler) {
3596
const XML_Char *localPart;
3597
const XML_Char *prefix;
3598
XML_Char *uri;
3599
localPart = tag->name.localPart;
3600
if (parser->m_ns && localPart) {
3601
/* localPart and prefix may have been overwritten in
3602
tag->name.str, since this points to the binding->uri
3603
buffer which gets reused; so we have to add them again
3604
*/
3605
uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3606
/* don't need to check for space - already done in storeAtts() */
3607
while (*localPart)
3608
*uri++ = *localPart++;
3609
prefix = tag->name.prefix;
3610
if (parser->m_ns_triplets && prefix) {
3611
*uri++ = parser->m_namespaceSeparator;
3612
while (*prefix)
3613
*uri++ = *prefix++;
3614
}
3615
*uri = XML_T('\0');
3616
}
3617
parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3618
} else if (parser->m_defaultHandler)
3619
reportDefault(parser, enc, s, next);
3620
while (tag->bindings) {
3621
BINDING *b = tag->bindings;
3622
if (parser->m_endNamespaceDeclHandler)
3623
parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3624
b->prefix->name);
3625
tag->bindings = tag->bindings->nextTagBinding;
3626
b->nextTagBinding = parser->m_freeBindingList;
3627
parser->m_freeBindingList = b;
3628
b->prefix->binding = b->prevPrefixBinding;
3629
}
3630
if ((parser->m_tagLevel == 0)
3631
&& (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3632
if (parser->m_parsingStatus.parsing == XML_SUSPENDED
3633
|| (parser->m_parsingStatus.parsing == XML_PARSING
3634
&& parser->m_reenter))
3635
parser->m_processor = epilogProcessor;
3636
else
3637
return epilogProcessor(parser, next, end, nextPtr);
3638
}
3639
}
3640
break;
3641
case XML_TOK_CHAR_REF: {
3642
int n = XmlCharRefNumber(enc, s);
3643
if (n < 0)
3644
return XML_ERROR_BAD_CHAR_REF;
3645
if (parser->m_characterDataHandler) {
3646
XML_Char buf[XML_ENCODE_MAX];
3647
parser->m_characterDataHandler(parser->m_handlerArg, buf,
3648
XmlEncode(n, (ICHAR *)buf));
3649
} else if (parser->m_defaultHandler)
3650
reportDefault(parser, enc, s, next);
3651
} break;
3652
case XML_TOK_XML_DECL:
3653
return XML_ERROR_MISPLACED_XML_PI;
3654
case XML_TOK_DATA_NEWLINE:
3655
if (parser->m_characterDataHandler) {
3656
XML_Char c = 0xA;
3657
parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3658
} else if (parser->m_defaultHandler)
3659
reportDefault(parser, enc, s, next);
3660
break;
3661
case XML_TOK_CDATA_SECT_OPEN: {
3662
enum XML_Error result;
3663
if (parser->m_startCdataSectionHandler)
3664
parser->m_startCdataSectionHandler(parser->m_handlerArg);
3665
/* BEGIN disabled code */
3666
/* Suppose you doing a transformation on a document that involves
3667
changing only the character data. You set up a defaultHandler
3668
and a characterDataHandler. The defaultHandler simply copies
3669
characters through. The characterDataHandler does the
3670
transformation and writes the characters out escaping them as
3671
necessary. This case will fail to work if we leave out the
3672
following two lines (because & and < inside CDATA sections will
3673
be incorrectly escaped).
3674
3675
However, now we have a start/endCdataSectionHandler, so it seems
3676
easier to let the user deal with this.
3677
*/
3678
else if ((0) && parser->m_characterDataHandler)
3679
parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3680
0);
3681
/* END disabled code */
3682
else if (parser->m_defaultHandler)
3683
reportDefault(parser, enc, s, next);
3684
result
3685
= doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3686
if (result != XML_ERROR_NONE)
3687
return result;
3688
else if (! next) {
3689
parser->m_processor = cdataSectionProcessor;
3690
return result;
3691
}
3692
} break;
3693
case XML_TOK_TRAILING_RSQB:
3694
if (haveMore) {
3695
*nextPtr = s;
3696
return XML_ERROR_NONE;
3697
}
3698
if (parser->m_characterDataHandler) {
3699
if (MUST_CONVERT(enc, s)) {
3700
ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3701
XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3702
parser->m_characterDataHandler(
3703
parser->m_handlerArg, parser->m_dataBuf,
3704
(int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3705
} else
3706
parser->m_characterDataHandler(
3707
parser->m_handlerArg, (const XML_Char *)s,
3708
(int)((const XML_Char *)end - (const XML_Char *)s));
3709
} else if (parser->m_defaultHandler)
3710
reportDefault(parser, enc, s, end);
3711
/* We are at the end of the final buffer, should we check for
3712
XML_SUSPENDED, XML_FINISHED?
3713
*/
3714
if (startTagLevel == 0) {
3715
*eventPP = end;
3716
return XML_ERROR_NO_ELEMENTS;
3717
}
3718
if (parser->m_tagLevel != startTagLevel) {
3719
*eventPP = end;
3720
return XML_ERROR_ASYNC_ENTITY;
3721
}
3722
*nextPtr = end;
3723
return XML_ERROR_NONE;
3724
case XML_TOK_DATA_CHARS: {
3725
XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3726
if (charDataHandler) {
3727
if (MUST_CONVERT(enc, s)) {
3728
for (;;) {
3729
ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3730
const enum XML_Convert_Result convert_res = XmlConvert(
3731
enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3732
*eventEndPP = s;
3733
charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3734
(int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3735
if ((convert_res == XML_CONVERT_COMPLETED)
3736
|| (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3737
break;
3738
*eventPP = s;
3739
}
3740
} else
3741
charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3742
(int)((const XML_Char *)next - (const XML_Char *)s));
3743
} else if (parser->m_defaultHandler)
3744
reportDefault(parser, enc, s, next);
3745
} break;
3746
case XML_TOK_PI:
3747
if (! reportProcessingInstruction(parser, enc, s, next))
3748
return XML_ERROR_NO_MEMORY;
3749
break;
3750
case XML_TOK_COMMENT:
3751
if (! reportComment(parser, enc, s, next))
3752
return XML_ERROR_NO_MEMORY;
3753
break;
3754
default:
3755
/* All of the tokens produced by XmlContentTok() have their own
3756
* explicit cases, so this default is not strictly necessary.
3757
* However it is a useful safety net, so we retain the code and
3758
* simply exclude it from the coverage tests.
3759
*
3760
* LCOV_EXCL_START
3761
*/
3762
if (parser->m_defaultHandler)
3763
reportDefault(parser, enc, s, next);
3764
break;
3765
/* LCOV_EXCL_STOP */
3766
}
3767
switch (parser->m_parsingStatus.parsing) {
3768
case XML_SUSPENDED:
3769
*eventPP = next;
3770
*nextPtr = next;
3771
return XML_ERROR_NONE;
3772
case XML_FINISHED:
3773
*eventPP = next;
3774
return XML_ERROR_ABORTED;
3775
case XML_PARSING:
3776
if (parser->m_reenter) {
3777
*nextPtr = next;
3778
return XML_ERROR_NONE;
3779
}
3780
/* Fall through */
3781
default:;
3782
*eventPP = s = next;
3783
}
3784
}
3785
/* not reached */
3786
}
3787
3788
/* This function does not call free() on the allocated memory, merely
3789
* moving it to the parser's m_freeBindingList where it can be freed or
3790
* reused as appropriate.
3791
*/
3792
static void
3793
freeBindings(XML_Parser parser, BINDING *bindings) {
3794
while (bindings) {
3795
BINDING *b = bindings;
3796
3797
/* m_startNamespaceDeclHandler will have been called for this
3798
* binding in addBindings(), so call the end handler now.
3799
*/
3800
if (parser->m_endNamespaceDeclHandler)
3801
parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3802
3803
bindings = bindings->nextTagBinding;
3804
b->nextTagBinding = parser->m_freeBindingList;
3805
parser->m_freeBindingList = b;
3806
b->prefix->binding = b->prevPrefixBinding;
3807
}
3808
}
3809
3810
/* Precondition: all arguments must be non-NULL;
3811
Purpose:
3812
- normalize attributes
3813
- check attributes for well-formedness
3814
- generate namespace aware attribute names (URI, prefix)
3815
- build list of attributes for startElementHandler
3816
- default attributes
3817
- process namespace declarations (check and report them)
3818
- generate namespace aware element name (URI, prefix)
3819
*/
3820
static enum XML_Error
3821
storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3822
TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3823
enum XML_Account account) {
3824
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3825
ELEMENT_TYPE *elementType;
3826
int nDefaultAtts;
3827
const XML_Char **appAtts; /* the attribute list for the application */
3828
int attIndex = 0;
3829
int prefixLen;
3830
int i;
3831
int n;
3832
XML_Char *uri;
3833
int nPrefixes = 0;
3834
BINDING *binding;
3835
const XML_Char *localPart;
3836
3837
/* lookup the element type name */
3838
elementType
3839
= (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3840
if (! elementType) {
3841
const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3842
if (! name)
3843
return XML_ERROR_NO_MEMORY;
3844
elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3845
sizeof(ELEMENT_TYPE));
3846
if (! elementType)
3847
return XML_ERROR_NO_MEMORY;
3848
if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3849
return XML_ERROR_NO_MEMORY;
3850
}
3851
nDefaultAtts = elementType->nDefaultAtts;
3852
3853
/* get the attributes from the tokenizer */
3854
n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3855
3856
/* Detect and prevent integer overflow */
3857
if (n > INT_MAX - nDefaultAtts) {
3858
return XML_ERROR_NO_MEMORY;
3859
}
3860
3861
if (n + nDefaultAtts > parser->m_attsSize) {
3862
int oldAttsSize = parser->m_attsSize;
3863
ATTRIBUTE *temp;
3864
#ifdef XML_ATTR_INFO
3865
XML_AttrInfo *temp2;
3866
#endif
3867
3868
/* Detect and prevent integer overflow */
3869
if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3870
|| (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3871
return XML_ERROR_NO_MEMORY;
3872
}
3873
3874
parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3875
3876
/* Detect and prevent integer overflow.
3877
* The preprocessor guard addresses the "always false" warning
3878
* from -Wtype-limits on platforms where
3879
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3880
#if UINT_MAX >= SIZE_MAX
3881
if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3882
parser->m_attsSize = oldAttsSize;
3883
return XML_ERROR_NO_MEMORY;
3884
}
3885
#endif
3886
3887
temp = REALLOC(parser, parser->m_atts,
3888
parser->m_attsSize * sizeof(ATTRIBUTE));
3889
if (temp == NULL) {
3890
parser->m_attsSize = oldAttsSize;
3891
return XML_ERROR_NO_MEMORY;
3892
}
3893
parser->m_atts = temp;
3894
#ifdef XML_ATTR_INFO
3895
/* Detect and prevent integer overflow.
3896
* The preprocessor guard addresses the "always false" warning
3897
* from -Wtype-limits on platforms where
3898
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3899
# if UINT_MAX >= SIZE_MAX
3900
if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3901
parser->m_attsSize = oldAttsSize;
3902
return XML_ERROR_NO_MEMORY;
3903
}
3904
# endif
3905
3906
temp2 = REALLOC(parser, parser->m_attInfo,
3907
parser->m_attsSize * sizeof(XML_AttrInfo));
3908
if (temp2 == NULL) {
3909
parser->m_attsSize = oldAttsSize;
3910
return XML_ERROR_NO_MEMORY;
3911
}
3912
parser->m_attInfo = temp2;
3913
#endif
3914
if (n > oldAttsSize)
3915
XmlGetAttributes(enc, attStr, n, parser->m_atts);
3916
}
3917
3918
appAtts = (const XML_Char **)parser->m_atts;
3919
for (i = 0; i < n; i++) {
3920
ATTRIBUTE *currAtt = &parser->m_atts[i];
3921
#ifdef XML_ATTR_INFO
3922
XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3923
#endif
3924
/* add the name and value to the attribute list */
3925
ATTRIBUTE_ID *attId
3926
= getAttributeId(parser, enc, currAtt->name,
3927
currAtt->name + XmlNameLength(enc, currAtt->name));
3928
if (! attId)
3929
return XML_ERROR_NO_MEMORY;
3930
#ifdef XML_ATTR_INFO
3931
currAttInfo->nameStart
3932
= parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3933
currAttInfo->nameEnd
3934
= currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3935
currAttInfo->valueStart = parser->m_parseEndByteIndex
3936
- (parser->m_parseEndPtr - currAtt->valuePtr);
3937
currAttInfo->valueEnd = parser->m_parseEndByteIndex
3938
- (parser->m_parseEndPtr - currAtt->valueEnd);
3939
#endif
3940
/* Detect duplicate attributes by their QNames. This does not work when
3941
namespace processing is turned on and different prefixes for the same
3942
namespace are used. For this case we have a check further down.
3943
*/
3944
if ((attId->name)[-1]) {
3945
if (enc == parser->m_encoding)
3946
parser->m_eventPtr = parser->m_atts[i].name;
3947
return XML_ERROR_DUPLICATE_ATTRIBUTE;
3948
}
3949
(attId->name)[-1] = 1;
3950
appAtts[attIndex++] = attId->name;
3951
if (! parser->m_atts[i].normalized) {
3952
enum XML_Error result;
3953
XML_Bool isCdata = XML_TRUE;
3954
3955
/* figure out whether declared as other than CDATA */
3956
if (attId->maybeTokenized) {
3957
int j;
3958
for (j = 0; j < nDefaultAtts; j++) {
3959
if (attId == elementType->defaultAtts[j].id) {
3960
isCdata = elementType->defaultAtts[j].isCdata;
3961
break;
3962
}
3963
}
3964
}
3965
3966
/* normalize the attribute value */
3967
result = storeAttributeValue(
3968
parser, enc, isCdata, parser->m_atts[i].valuePtr,
3969
parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3970
if (result)
3971
return result;
3972
appAtts[attIndex] = poolStart(&parser->m_tempPool);
3973
poolFinish(&parser->m_tempPool);
3974
} else {
3975
/* the value did not need normalizing */
3976
appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3977
parser->m_atts[i].valuePtr,
3978
parser->m_atts[i].valueEnd);
3979
if (appAtts[attIndex] == 0)
3980
return XML_ERROR_NO_MEMORY;
3981
poolFinish(&parser->m_tempPool);
3982
}
3983
/* handle prefixed attribute names */
3984
if (attId->prefix) {
3985
if (attId->xmlns) {
3986
/* deal with namespace declarations here */
3987
enum XML_Error result = addBinding(parser, attId->prefix, attId,
3988
appAtts[attIndex], bindingsPtr);
3989
if (result)
3990
return result;
3991
--attIndex;
3992
} else {
3993
/* deal with other prefixed names later */
3994
attIndex++;
3995
nPrefixes++;
3996
(attId->name)[-1] = 2;
3997
}
3998
} else
3999
attIndex++;
4000
}
4001
4002
/* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
4003
parser->m_nSpecifiedAtts = attIndex;
4004
if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
4005
for (i = 0; i < attIndex; i += 2)
4006
if (appAtts[i] == elementType->idAtt->name) {
4007
parser->m_idAttIndex = i;
4008
break;
4009
}
4010
} else
4011
parser->m_idAttIndex = -1;
4012
4013
/* do attribute defaulting */
4014
for (i = 0; i < nDefaultAtts; i++) {
4015
const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
4016
if (! (da->id->name)[-1] && da->value) {
4017
if (da->id->prefix) {
4018
if (da->id->xmlns) {
4019
enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
4020
da->value, bindingsPtr);
4021
if (result)
4022
return result;
4023
} else {
4024
(da->id->name)[-1] = 2;
4025
nPrefixes++;
4026
appAtts[attIndex++] = da->id->name;
4027
appAtts[attIndex++] = da->value;
4028
}
4029
} else {
4030
(da->id->name)[-1] = 1;
4031
appAtts[attIndex++] = da->id->name;
4032
appAtts[attIndex++] = da->value;
4033
}
4034
}
4035
}
4036
appAtts[attIndex] = 0;
4037
4038
/* expand prefixed attribute names, check for duplicates,
4039
and clear flags that say whether attributes were specified */
4040
i = 0;
4041
if (nPrefixes) {
4042
unsigned int j; /* hash table index */
4043
unsigned long version = parser->m_nsAttsVersion;
4044
4045
/* Detect and prevent invalid shift */
4046
if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
4047
return XML_ERROR_NO_MEMORY;
4048
}
4049
4050
unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
4051
unsigned char oldNsAttsPower = parser->m_nsAttsPower;
4052
/* size of hash table must be at least 2 * (# of prefixed attributes) */
4053
if ((nPrefixes << 1)
4054
>> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
4055
NS_ATT *temp;
4056
/* hash table size must also be a power of 2 and >= 8 */
4057
while (nPrefixes >> parser->m_nsAttsPower++)
4058
;
4059
if (parser->m_nsAttsPower < 3)
4060
parser->m_nsAttsPower = 3;
4061
4062
/* Detect and prevent invalid shift */
4063
if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
4064
/* Restore actual size of memory in m_nsAtts */
4065
parser->m_nsAttsPower = oldNsAttsPower;
4066
return XML_ERROR_NO_MEMORY;
4067
}
4068
4069
nsAttsSize = 1u << parser->m_nsAttsPower;
4070
4071
/* Detect and prevent integer overflow.
4072
* The preprocessor guard addresses the "always false" warning
4073
* from -Wtype-limits on platforms where
4074
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4075
#if UINT_MAX >= SIZE_MAX
4076
if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
4077
/* Restore actual size of memory in m_nsAtts */
4078
parser->m_nsAttsPower = oldNsAttsPower;
4079
return XML_ERROR_NO_MEMORY;
4080
}
4081
#endif
4082
4083
temp = REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT));
4084
if (! temp) {
4085
/* Restore actual size of memory in m_nsAtts */
4086
parser->m_nsAttsPower = oldNsAttsPower;
4087
return XML_ERROR_NO_MEMORY;
4088
}
4089
parser->m_nsAtts = temp;
4090
version = 0; /* force re-initialization of m_nsAtts hash table */
4091
}
4092
/* using a version flag saves us from initializing m_nsAtts every time */
4093
if (! version) { /* initialize version flags when version wraps around */
4094
version = INIT_ATTS_VERSION;
4095
for (j = nsAttsSize; j != 0;)
4096
parser->m_nsAtts[--j].version = version;
4097
}
4098
parser->m_nsAttsVersion = --version;
4099
4100
/* expand prefixed names and check for duplicates */
4101
for (; i < attIndex; i += 2) {
4102
const XML_Char *s = appAtts[i];
4103
if (s[-1] == 2) { /* prefixed */
4104
ATTRIBUTE_ID *id;
4105
const BINDING *b;
4106
unsigned long uriHash;
4107
struct siphash sip_state;
4108
struct sipkey sip_key;
4109
4110
copy_salt_to_sipkey(parser, &sip_key);
4111
sip24_init(&sip_state, &sip_key);
4112
4113
((XML_Char *)s)[-1] = 0; /* clear flag */
4114
id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
4115
if (! id || ! id->prefix) {
4116
/* This code is walking through the appAtts array, dealing
4117
* with (in this case) a prefixed attribute name. To be in
4118
* the array, the attribute must have already been bound, so
4119
* has to have passed through the hash table lookup once
4120
* already. That implies that an entry for it already
4121
* exists, so the lookup above will return a pointer to
4122
* already allocated memory. There is no opportunaity for
4123
* the allocator to fail, so the condition above cannot be
4124
* fulfilled.
4125
*
4126
* Since it is difficult to be certain that the above
4127
* analysis is complete, we retain the test and merely
4128
* remove the code from coverage tests.
4129
*/
4130
return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4131
}
4132
b = id->prefix->binding;
4133
if (! b)
4134
return XML_ERROR_UNBOUND_PREFIX;
4135
4136
for (j = 0; j < (unsigned int)b->uriLen; j++) {
4137
const XML_Char c = b->uri[j];
4138
if (! poolAppendChar(&parser->m_tempPool, c))
4139
return XML_ERROR_NO_MEMORY;
4140
}
4141
4142
sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
4143
4144
while (*s++ != XML_T(ASCII_COLON))
4145
;
4146
4147
sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
4148
4149
do { /* copies null terminator */
4150
if (! poolAppendChar(&parser->m_tempPool, *s))
4151
return XML_ERROR_NO_MEMORY;
4152
} while (*s++);
4153
4154
uriHash = (unsigned long)sip24_final(&sip_state);
4155
4156
{ /* Check hash table for duplicate of expanded name (uriName).
4157
Derived from code in lookup(parser, HASH_TABLE *table, ...).
4158
*/
4159
unsigned char step = 0;
4160
unsigned long mask = nsAttsSize - 1;
4161
j = uriHash & mask; /* index into hash table */
4162
while (parser->m_nsAtts[j].version == version) {
4163
/* for speed we compare stored hash values first */
4164
if (uriHash == parser->m_nsAtts[j].hash) {
4165
const XML_Char *s1 = poolStart(&parser->m_tempPool);
4166
const XML_Char *s2 = parser->m_nsAtts[j].uriName;
4167
/* s1 is null terminated, but not s2 */
4168
for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
4169
;
4170
if (*s1 == 0)
4171
return XML_ERROR_DUPLICATE_ATTRIBUTE;
4172
}
4173
if (! step)
4174
step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
4175
j < step ? (j += nsAttsSize - step) : (j -= step);
4176
}
4177
}
4178
4179
if (parser->m_ns_triplets) { /* append namespace separator and prefix */
4180
parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
4181
s = b->prefix->name;
4182
do {
4183
if (! poolAppendChar(&parser->m_tempPool, *s))
4184
return XML_ERROR_NO_MEMORY;
4185
} while (*s++);
4186
}
4187
4188
/* store expanded name in attribute list */
4189
s = poolStart(&parser->m_tempPool);
4190
poolFinish(&parser->m_tempPool);
4191
appAtts[i] = s;
4192
4193
/* fill empty slot with new version, uriName and hash value */
4194
parser->m_nsAtts[j].version = version;
4195
parser->m_nsAtts[j].hash = uriHash;
4196
parser->m_nsAtts[j].uriName = s;
4197
4198
if (! --nPrefixes) {
4199
i += 2;
4200
break;
4201
}
4202
} else /* not prefixed */
4203
((XML_Char *)s)[-1] = 0; /* clear flag */
4204
}
4205
}
4206
/* clear flags for the remaining attributes */
4207
for (; i < attIndex; i += 2)
4208
((XML_Char *)(appAtts[i]))[-1] = 0;
4209
for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
4210
binding->attId->name[-1] = 0;
4211
4212
if (! parser->m_ns)
4213
return XML_ERROR_NONE;
4214
4215
/* expand the element type name */
4216
if (elementType->prefix) {
4217
binding = elementType->prefix->binding;
4218
if (! binding)
4219
return XML_ERROR_UNBOUND_PREFIX;
4220
localPart = tagNamePtr->str;
4221
while (*localPart++ != XML_T(ASCII_COLON))
4222
;
4223
} else if (dtd->defaultPrefix.binding) {
4224
binding = dtd->defaultPrefix.binding;
4225
localPart = tagNamePtr->str;
4226
} else
4227
return XML_ERROR_NONE;
4228
prefixLen = 0;
4229
if (parser->m_ns_triplets && binding->prefix->name) {
4230
while (binding->prefix->name[prefixLen++])
4231
; /* prefixLen includes null terminator */
4232
}
4233
tagNamePtr->localPart = localPart;
4234
tagNamePtr->uriLen = binding->uriLen;
4235
tagNamePtr->prefix = binding->prefix->name;
4236
tagNamePtr->prefixLen = prefixLen;
4237
for (i = 0; localPart[i++];)
4238
; /* i includes null terminator */
4239
4240
/* Detect and prevent integer overflow */
4241
if (binding->uriLen > INT_MAX - prefixLen
4242
|| i > INT_MAX - (binding->uriLen + prefixLen)) {
4243
return XML_ERROR_NO_MEMORY;
4244
}
4245
4246
n = i + binding->uriLen + prefixLen;
4247
if (n > binding->uriAlloc) {
4248
TAG *p;
4249
4250
/* Detect and prevent integer overflow */
4251
if (n > INT_MAX - EXPAND_SPARE) {
4252
return XML_ERROR_NO_MEMORY;
4253
}
4254
/* Detect and prevent integer overflow.
4255
* The preprocessor guard addresses the "always false" warning
4256
* from -Wtype-limits on platforms where
4257
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4258
#if UINT_MAX >= SIZE_MAX
4259
if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4260
return XML_ERROR_NO_MEMORY;
4261
}
4262
#endif
4263
4264
uri = MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
4265
if (! uri)
4266
return XML_ERROR_NO_MEMORY;
4267
binding->uriAlloc = n + EXPAND_SPARE;
4268
memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
4269
for (p = parser->m_tagStack; p; p = p->parent)
4270
if (p->name.str == binding->uri)
4271
p->name.str = uri;
4272
FREE(parser, binding->uri);
4273
binding->uri = uri;
4274
}
4275
/* if m_namespaceSeparator != '\0' then uri includes it already */
4276
uri = binding->uri + binding->uriLen;
4277
memcpy(uri, localPart, i * sizeof(XML_Char));
4278
/* we always have a namespace separator between localPart and prefix */
4279
if (prefixLen) {
4280
uri += i - 1;
4281
*uri = parser->m_namespaceSeparator; /* replace null terminator */
4282
memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
4283
}
4284
tagNamePtr->str = binding->uri;
4285
return XML_ERROR_NONE;
4286
}
4287
4288
static XML_Bool
4289
is_rfc3986_uri_char(XML_Char candidate) {
4290
// For the RFC 3986 ANBF grammar see
4291
// https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
4292
4293
switch (candidate) {
4294
// From rule "ALPHA" (uppercase half)
4295
case 'A':
4296
case 'B':
4297
case 'C':
4298
case 'D':
4299
case 'E':
4300
case 'F':
4301
case 'G':
4302
case 'H':
4303
case 'I':
4304
case 'J':
4305
case 'K':
4306
case 'L':
4307
case 'M':
4308
case 'N':
4309
case 'O':
4310
case 'P':
4311
case 'Q':
4312
case 'R':
4313
case 'S':
4314
case 'T':
4315
case 'U':
4316
case 'V':
4317
case 'W':
4318
case 'X':
4319
case 'Y':
4320
case 'Z':
4321
4322
// From rule "ALPHA" (lowercase half)
4323
case 'a':
4324
case 'b':
4325
case 'c':
4326
case 'd':
4327
case 'e':
4328
case 'f':
4329
case 'g':
4330
case 'h':
4331
case 'i':
4332
case 'j':
4333
case 'k':
4334
case 'l':
4335
case 'm':
4336
case 'n':
4337
case 'o':
4338
case 'p':
4339
case 'q':
4340
case 'r':
4341
case 's':
4342
case 't':
4343
case 'u':
4344
case 'v':
4345
case 'w':
4346
case 'x':
4347
case 'y':
4348
case 'z':
4349
4350
// From rule "DIGIT"
4351
case '0':
4352
case '1':
4353
case '2':
4354
case '3':
4355
case '4':
4356
case '5':
4357
case '6':
4358
case '7':
4359
case '8':
4360
case '9':
4361
4362
// From rule "pct-encoded"
4363
case '%':
4364
4365
// From rule "unreserved"
4366
case '-':
4367
case '.':
4368
case '_':
4369
case '~':
4370
4371
// From rule "gen-delims"
4372
case ':':
4373
case '/':
4374
case '?':
4375
case '#':
4376
case '[':
4377
case ']':
4378
case '@':
4379
4380
// From rule "sub-delims"
4381
case '!':
4382
case '$':
4383
case '&':
4384
case '\'':
4385
case '(':
4386
case ')':
4387
case '*':
4388
case '+':
4389
case ',':
4390
case ';':
4391
case '=':
4392
return XML_TRUE;
4393
4394
default:
4395
return XML_FALSE;
4396
}
4397
}
4398
4399
/* addBinding() overwrites the value of prefix->binding without checking.
4400
Therefore one must keep track of the old value outside of addBinding().
4401
*/
4402
static enum XML_Error
4403
addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
4404
const XML_Char *uri, BINDING **bindingsPtr) {
4405
// "http://www.w3.org/XML/1998/namespace"
4406
static const XML_Char xmlNamespace[]
4407
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
4408
ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
4409
ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
4410
ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
4411
ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
4412
ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
4413
ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
4414
ASCII_e, '\0'};
4415
static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
4416
// "http://www.w3.org/2000/xmlns/"
4417
static const XML_Char xmlnsNamespace[]
4418
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
4419
ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
4420
ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
4421
ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
4422
ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
4423
static const int xmlnsLen
4424
= (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
4425
4426
XML_Bool mustBeXML = XML_FALSE;
4427
XML_Bool isXML = XML_TRUE;
4428
XML_Bool isXMLNS = XML_TRUE;
4429
4430
BINDING *b;
4431
int len;
4432
4433
/* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
4434
if (*uri == XML_T('\0') && prefix->name)
4435
return XML_ERROR_UNDECLARING_PREFIX;
4436
4437
if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
4438
&& prefix->name[1] == XML_T(ASCII_m)
4439
&& prefix->name[2] == XML_T(ASCII_l)) {
4440
/* Not allowed to bind xmlns */
4441
if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
4442
&& prefix->name[5] == XML_T('\0'))
4443
return XML_ERROR_RESERVED_PREFIX_XMLNS;
4444
4445
if (prefix->name[3] == XML_T('\0'))
4446
mustBeXML = XML_TRUE;
4447
}
4448
4449
for (len = 0; uri[len]; len++) {
4450
if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
4451
isXML = XML_FALSE;
4452
4453
if (! mustBeXML && isXMLNS
4454
&& (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
4455
isXMLNS = XML_FALSE;
4456
4457
// NOTE: While Expat does not validate namespace URIs against RFC 3986
4458
// today (and is not REQUIRED to do so with regard to the XML 1.0
4459
// namespaces specification) we have to at least make sure, that
4460
// the application on top of Expat (that is likely splitting expanded
4461
// element names ("qualified names") of form
4462
// "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
4463
// in its element handler code) cannot be confused by an attacker
4464
// putting additional namespace separator characters into namespace
4465
// declarations. That would be ambiguous and not to be expected.
4466
//
4467
// While the HTML API docs of function XML_ParserCreateNS have been
4468
// advising against use of a namespace separator character that can
4469
// appear in a URI for >20 years now, some widespread applications
4470
// are using URI characters (':' (colon) in particular) for a
4471
// namespace separator, in practice. To keep these applications
4472
// functional, we only reject namespaces URIs containing the
4473
// application-chosen namespace separator if the chosen separator
4474
// is a non-URI character with regard to RFC 3986.
4475
if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
4476
&& ! is_rfc3986_uri_char(uri[len])) {
4477
return XML_ERROR_SYNTAX;
4478
}
4479
}
4480
isXML = isXML && len == xmlLen;
4481
isXMLNS = isXMLNS && len == xmlnsLen;
4482
4483
if (mustBeXML != isXML)
4484
return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
4485
: XML_ERROR_RESERVED_NAMESPACE_URI;
4486
4487
if (isXMLNS)
4488
return XML_ERROR_RESERVED_NAMESPACE_URI;
4489
4490
if (parser->m_namespaceSeparator)
4491
len++;
4492
if (parser->m_freeBindingList) {
4493
b = parser->m_freeBindingList;
4494
if (len > b->uriAlloc) {
4495
/* Detect and prevent integer overflow */
4496
if (len > INT_MAX - EXPAND_SPARE) {
4497
return XML_ERROR_NO_MEMORY;
4498
}
4499
4500
/* Detect and prevent integer overflow.
4501
* The preprocessor guard addresses the "always false" warning
4502
* from -Wtype-limits on platforms where
4503
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4504
#if UINT_MAX >= SIZE_MAX
4505
if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4506
return XML_ERROR_NO_MEMORY;
4507
}
4508
#endif
4509
4510
XML_Char *temp
4511
= REALLOC(parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4512
if (temp == NULL)
4513
return XML_ERROR_NO_MEMORY;
4514
b->uri = temp;
4515
b->uriAlloc = len + EXPAND_SPARE;
4516
}
4517
parser->m_freeBindingList = b->nextTagBinding;
4518
} else {
4519
b = MALLOC(parser, sizeof(BINDING));
4520
if (! b)
4521
return XML_ERROR_NO_MEMORY;
4522
4523
/* Detect and prevent integer overflow */
4524
if (len > INT_MAX - EXPAND_SPARE) {
4525
return XML_ERROR_NO_MEMORY;
4526
}
4527
/* Detect and prevent integer overflow.
4528
* The preprocessor guard addresses the "always false" warning
4529
* from -Wtype-limits on platforms where
4530
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4531
#if UINT_MAX >= SIZE_MAX
4532
if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4533
return XML_ERROR_NO_MEMORY;
4534
}
4535
#endif
4536
4537
b->uri = MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4538
if (! b->uri) {
4539
FREE(parser, b);
4540
return XML_ERROR_NO_MEMORY;
4541
}
4542
b->uriAlloc = len + EXPAND_SPARE;
4543
}
4544
b->uriLen = len;
4545
memcpy(b->uri, uri, len * sizeof(XML_Char));
4546
if (parser->m_namespaceSeparator)
4547
b->uri[len - 1] = parser->m_namespaceSeparator;
4548
b->prefix = prefix;
4549
b->attId = attId;
4550
b->prevPrefixBinding = prefix->binding;
4551
/* NULL binding when default namespace undeclared */
4552
if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4553
prefix->binding = NULL;
4554
else
4555
prefix->binding = b;
4556
b->nextTagBinding = *bindingsPtr;
4557
*bindingsPtr = b;
4558
/* if attId == NULL then we are not starting a namespace scope */
4559
if (attId && parser->m_startNamespaceDeclHandler)
4560
parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4561
prefix->binding ? uri : 0);
4562
return XML_ERROR_NONE;
4563
}
4564
4565
/* The idea here is to avoid using stack for each CDATA section when
4566
the whole file is parsed with one call.
4567
*/
4568
static enum XML_Error PTRCALL
4569
cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4570
const char **endPtr) {
4571
enum XML_Error result = doCdataSection(
4572
parser, parser->m_encoding, &start, end, endPtr,
4573
(XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4574
if (result != XML_ERROR_NONE)
4575
return result;
4576
if (start) {
4577
if (parser->m_parentParser) { /* we are parsing an external entity */
4578
parser->m_processor = externalEntityContentProcessor;
4579
return externalEntityContentProcessor(parser, start, end, endPtr);
4580
} else {
4581
parser->m_processor = contentProcessor;
4582
return contentProcessor(parser, start, end, endPtr);
4583
}
4584
}
4585
return result;
4586
}
4587
4588
/* startPtr gets set to non-null if the section is closed, and to null if
4589
the section is not yet closed.
4590
*/
4591
static enum XML_Error
4592
doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4593
const char *end, const char **nextPtr, XML_Bool haveMore,
4594
enum XML_Account account) {
4595
const char *s = *startPtr;
4596
const char **eventPP;
4597
const char **eventEndPP;
4598
if (enc == parser->m_encoding) {
4599
eventPP = &parser->m_eventPtr;
4600
*eventPP = s;
4601
eventEndPP = &parser->m_eventEndPtr;
4602
} else {
4603
eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4604
eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4605
}
4606
*eventPP = s;
4607
*startPtr = NULL;
4608
4609
for (;;) {
4610
const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4611
int tok = XmlCdataSectionTok(enc, s, end, &next);
4612
#if XML_GE == 1
4613
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4614
accountingOnAbort(parser);
4615
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4616
}
4617
#else
4618
UNUSED_P(account);
4619
#endif
4620
*eventEndPP = next;
4621
switch (tok) {
4622
case XML_TOK_CDATA_SECT_CLOSE:
4623
if (parser->m_endCdataSectionHandler)
4624
parser->m_endCdataSectionHandler(parser->m_handlerArg);
4625
/* BEGIN disabled code */
4626
/* see comment under XML_TOK_CDATA_SECT_OPEN */
4627
else if ((0) && parser->m_characterDataHandler)
4628
parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4629
0);
4630
/* END disabled code */
4631
else if (parser->m_defaultHandler)
4632
reportDefault(parser, enc, s, next);
4633
*startPtr = next;
4634
*nextPtr = next;
4635
if (parser->m_parsingStatus.parsing == XML_FINISHED)
4636
return XML_ERROR_ABORTED;
4637
else
4638
return XML_ERROR_NONE;
4639
case XML_TOK_DATA_NEWLINE:
4640
if (parser->m_characterDataHandler) {
4641
XML_Char c = 0xA;
4642
parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4643
} else if (parser->m_defaultHandler)
4644
reportDefault(parser, enc, s, next);
4645
break;
4646
case XML_TOK_DATA_CHARS: {
4647
XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4648
if (charDataHandler) {
4649
if (MUST_CONVERT(enc, s)) {
4650
for (;;) {
4651
ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4652
const enum XML_Convert_Result convert_res = XmlConvert(
4653
enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4654
*eventEndPP = next;
4655
charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4656
(int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4657
if ((convert_res == XML_CONVERT_COMPLETED)
4658
|| (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4659
break;
4660
*eventPP = s;
4661
}
4662
} else
4663
charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4664
(int)((const XML_Char *)next - (const XML_Char *)s));
4665
} else if (parser->m_defaultHandler)
4666
reportDefault(parser, enc, s, next);
4667
} break;
4668
case XML_TOK_INVALID:
4669
*eventPP = next;
4670
return XML_ERROR_INVALID_TOKEN;
4671
case XML_TOK_PARTIAL_CHAR:
4672
if (haveMore) {
4673
*nextPtr = s;
4674
return XML_ERROR_NONE;
4675
}
4676
return XML_ERROR_PARTIAL_CHAR;
4677
case XML_TOK_PARTIAL:
4678
case XML_TOK_NONE:
4679
if (haveMore) {
4680
*nextPtr = s;
4681
return XML_ERROR_NONE;
4682
}
4683
return XML_ERROR_UNCLOSED_CDATA_SECTION;
4684
default:
4685
/* Every token returned by XmlCdataSectionTok() has its own
4686
* explicit case, so this default case will never be executed.
4687
* We retain it as a safety net and exclude it from the coverage
4688
* statistics.
4689
*
4690
* LCOV_EXCL_START
4691
*/
4692
*eventPP = next;
4693
return XML_ERROR_UNEXPECTED_STATE;
4694
/* LCOV_EXCL_STOP */
4695
}
4696
4697
switch (parser->m_parsingStatus.parsing) {
4698
case XML_SUSPENDED:
4699
*eventPP = next;
4700
*nextPtr = next;
4701
return XML_ERROR_NONE;
4702
case XML_FINISHED:
4703
*eventPP = next;
4704
return XML_ERROR_ABORTED;
4705
case XML_PARSING:
4706
if (parser->m_reenter) {
4707
return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
4708
}
4709
/* Fall through */
4710
default:;
4711
*eventPP = s = next;
4712
}
4713
}
4714
/* not reached */
4715
}
4716
4717
#ifdef XML_DTD
4718
4719
/* The idea here is to avoid using stack for each IGNORE section when
4720
the whole file is parsed with one call.
4721
*/
4722
static enum XML_Error PTRCALL
4723
ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4724
const char **endPtr) {
4725
enum XML_Error result
4726
= doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4727
(XML_Bool)! parser->m_parsingStatus.finalBuffer);
4728
if (result != XML_ERROR_NONE)
4729
return result;
4730
if (start) {
4731
parser->m_processor = prologProcessor;
4732
return prologProcessor(parser, start, end, endPtr);
4733
}
4734
return result;
4735
}
4736
4737
/* startPtr gets set to non-null is the section is closed, and to null
4738
if the section is not yet closed.
4739
*/
4740
static enum XML_Error
4741
doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4742
const char *end, const char **nextPtr, XML_Bool haveMore) {
4743
const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4744
int tok;
4745
const char *s = *startPtr;
4746
const char **eventPP;
4747
const char **eventEndPP;
4748
if (enc == parser->m_encoding) {
4749
eventPP = &parser->m_eventPtr;
4750
*eventPP = s;
4751
eventEndPP = &parser->m_eventEndPtr;
4752
} else {
4753
/* It's not entirely clear, but it seems the following two lines
4754
* of code cannot be executed. The only occasions on which 'enc'
4755
* is not 'encoding' are when this function is called
4756
* from the internal entity processing, and IGNORE sections are an
4757
* error in internal entities.
4758
*
4759
* Since it really isn't clear that this is true, we keep the code
4760
* and just remove it from our coverage tests.
4761
*
4762
* LCOV_EXCL_START
4763
*/
4764
eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4765
eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4766
/* LCOV_EXCL_STOP */
4767
}
4768
*eventPP = s;
4769
*startPtr = NULL;
4770
tok = XmlIgnoreSectionTok(enc, s, end, &next);
4771
# if XML_GE == 1
4772
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4773
XML_ACCOUNT_DIRECT)) {
4774
accountingOnAbort(parser);
4775
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4776
}
4777
# endif
4778
*eventEndPP = next;
4779
switch (tok) {
4780
case XML_TOK_IGNORE_SECT:
4781
if (parser->m_defaultHandler)
4782
reportDefault(parser, enc, s, next);
4783
*startPtr = next;
4784
*nextPtr = next;
4785
if (parser->m_parsingStatus.parsing == XML_FINISHED)
4786
return XML_ERROR_ABORTED;
4787
else
4788
return XML_ERROR_NONE;
4789
case XML_TOK_INVALID:
4790
*eventPP = next;
4791
return XML_ERROR_INVALID_TOKEN;
4792
case XML_TOK_PARTIAL_CHAR:
4793
if (haveMore) {
4794
*nextPtr = s;
4795
return XML_ERROR_NONE;
4796
}
4797
return XML_ERROR_PARTIAL_CHAR;
4798
case XML_TOK_PARTIAL:
4799
case XML_TOK_NONE:
4800
if (haveMore) {
4801
*nextPtr = s;
4802
return XML_ERROR_NONE;
4803
}
4804
return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4805
default:
4806
/* All of the tokens that XmlIgnoreSectionTok() returns have
4807
* explicit cases to handle them, so this default case is never
4808
* executed. We keep it as a safety net anyway, and remove it
4809
* from our test coverage statistics.
4810
*
4811
* LCOV_EXCL_START
4812
*/
4813
*eventPP = next;
4814
return XML_ERROR_UNEXPECTED_STATE;
4815
/* LCOV_EXCL_STOP */
4816
}
4817
/* not reached */
4818
}
4819
4820
#endif /* XML_DTD */
4821
4822
static enum XML_Error
4823
initializeEncoding(XML_Parser parser) {
4824
const char *s;
4825
#ifdef XML_UNICODE
4826
char encodingBuf[128];
4827
/* See comments about `protocolEncodingName` in parserInit() */
4828
if (! parser->m_protocolEncodingName)
4829
s = NULL;
4830
else {
4831
int i;
4832
for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4833
if (i == sizeof(encodingBuf) - 1
4834
|| (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4835
encodingBuf[0] = '\0';
4836
break;
4837
}
4838
encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4839
}
4840
encodingBuf[i] = '\0';
4841
s = encodingBuf;
4842
}
4843
#else
4844
s = parser->m_protocolEncodingName;
4845
#endif
4846
if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4847
&parser->m_initEncoding, &parser->m_encoding, s))
4848
return XML_ERROR_NONE;
4849
return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4850
}
4851
4852
static enum XML_Error
4853
processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4854
const char *next) {
4855
const char *encodingName = NULL;
4856
const XML_Char *storedEncName = NULL;
4857
const ENCODING *newEncoding = NULL;
4858
const char *version = NULL;
4859
const char *versionend = NULL;
4860
const XML_Char *storedversion = NULL;
4861
int standalone = -1;
4862
4863
#if XML_GE == 1
4864
if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4865
XML_ACCOUNT_DIRECT)) {
4866
accountingOnAbort(parser);
4867
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4868
}
4869
#endif
4870
4871
if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4872
isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4873
&version, &versionend, &encodingName, &newEncoding, &standalone)) {
4874
if (isGeneralTextEntity)
4875
return XML_ERROR_TEXT_DECL;
4876
else
4877
return XML_ERROR_XML_DECL;
4878
}
4879
if (! isGeneralTextEntity && standalone == 1) {
4880
parser->m_dtd->standalone = XML_TRUE;
4881
#ifdef XML_DTD
4882
if (parser->m_paramEntityParsing
4883
== XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4884
parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4885
#endif /* XML_DTD */
4886
}
4887
if (parser->m_xmlDeclHandler) {
4888
if (encodingName != NULL) {
4889
storedEncName = poolStoreString(
4890
&parser->m_temp2Pool, parser->m_encoding, encodingName,
4891
encodingName + XmlNameLength(parser->m_encoding, encodingName));
4892
if (! storedEncName)
4893
return XML_ERROR_NO_MEMORY;
4894
poolFinish(&parser->m_temp2Pool);
4895
}
4896
if (version) {
4897
storedversion
4898
= poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4899
versionend - parser->m_encoding->minBytesPerChar);
4900
if (! storedversion)
4901
return XML_ERROR_NO_MEMORY;
4902
}
4903
parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4904
standalone);
4905
} else if (parser->m_defaultHandler)
4906
reportDefault(parser, parser->m_encoding, s, next);
4907
if (parser->m_protocolEncodingName == NULL) {
4908
if (newEncoding) {
4909
/* Check that the specified encoding does not conflict with what
4910
* the parser has already deduced. Do we have the same number
4911
* of bytes in the smallest representation of a character? If
4912
* this is UTF-16, is it the same endianness?
4913
*/
4914
if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4915
|| (newEncoding->minBytesPerChar == 2
4916
&& newEncoding != parser->m_encoding)) {
4917
parser->m_eventPtr = encodingName;
4918
return XML_ERROR_INCORRECT_ENCODING;
4919
}
4920
parser->m_encoding = newEncoding;
4921
} else if (encodingName) {
4922
enum XML_Error result;
4923
if (! storedEncName) {
4924
storedEncName = poolStoreString(
4925
&parser->m_temp2Pool, parser->m_encoding, encodingName,
4926
encodingName + XmlNameLength(parser->m_encoding, encodingName));
4927
if (! storedEncName)
4928
return XML_ERROR_NO_MEMORY;
4929
}
4930
result = handleUnknownEncoding(parser, storedEncName);
4931
poolClear(&parser->m_temp2Pool);
4932
if (result == XML_ERROR_UNKNOWN_ENCODING)
4933
parser->m_eventPtr = encodingName;
4934
return result;
4935
}
4936
}
4937
4938
if (storedEncName || storedversion)
4939
poolClear(&parser->m_temp2Pool);
4940
4941
return XML_ERROR_NONE;
4942
}
4943
4944
static enum XML_Error
4945
handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4946
if (parser->m_unknownEncodingHandler) {
4947
XML_Encoding info;
4948
int i;
4949
for (i = 0; i < 256; i++)
4950
info.map[i] = -1;
4951
info.convert = NULL;
4952
info.data = NULL;
4953
info.release = NULL;
4954
if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4955
encodingName, &info)) {
4956
ENCODING *enc;
4957
parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4958
if (! parser->m_unknownEncodingMem) {
4959
if (info.release)
4960
info.release(info.data);
4961
return XML_ERROR_NO_MEMORY;
4962
}
4963
enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4964
parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4965
if (enc) {
4966
parser->m_unknownEncodingData = info.data;
4967
parser->m_unknownEncodingRelease = info.release;
4968
parser->m_encoding = enc;
4969
return XML_ERROR_NONE;
4970
}
4971
}
4972
if (info.release != NULL)
4973
info.release(info.data);
4974
}
4975
return XML_ERROR_UNKNOWN_ENCODING;
4976
}
4977
4978
static enum XML_Error PTRCALL
4979
prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4980
const char **nextPtr) {
4981
enum XML_Error result = initializeEncoding(parser);
4982
if (result != XML_ERROR_NONE)
4983
return result;
4984
parser->m_processor = prologProcessor;
4985
return prologProcessor(parser, s, end, nextPtr);
4986
}
4987
4988
#ifdef XML_DTD
4989
4990
static enum XML_Error PTRCALL
4991
externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4992
const char **nextPtr) {
4993
enum XML_Error result = initializeEncoding(parser);
4994
if (result != XML_ERROR_NONE)
4995
return result;
4996
4997
/* we know now that XML_Parse(Buffer) has been called,
4998
so we consider the external parameter entity read */
4999
parser->m_dtd->paramEntityRead = XML_TRUE;
5000
5001
if (parser->m_prologState.inEntityValue) {
5002
parser->m_processor = entityValueInitProcessor;
5003
return entityValueInitProcessor(parser, s, end, nextPtr);
5004
} else {
5005
parser->m_processor = externalParEntProcessor;
5006
return externalParEntProcessor(parser, s, end, nextPtr);
5007
}
5008
}
5009
5010
static enum XML_Error PTRCALL
5011
entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
5012
const char **nextPtr) {
5013
int tok;
5014
const char *start = s;
5015
const char *next = start;
5016
parser->m_eventPtr = start;
5017
5018
for (;;) {
5019
tok = XmlPrologTok(parser->m_encoding, start, end, &next);
5020
/* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
5021
- storeEntityValue
5022
- processXmlDecl
5023
*/
5024
parser->m_eventEndPtr = next;
5025
if (tok <= 0) {
5026
if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
5027
*nextPtr = s;
5028
return XML_ERROR_NONE;
5029
}
5030
switch (tok) {
5031
case XML_TOK_INVALID:
5032
return XML_ERROR_INVALID_TOKEN;
5033
case XML_TOK_PARTIAL:
5034
return XML_ERROR_UNCLOSED_TOKEN;
5035
case XML_TOK_PARTIAL_CHAR:
5036
return XML_ERROR_PARTIAL_CHAR;
5037
case XML_TOK_NONE: /* start == end */
5038
default:
5039
break;
5040
}
5041
/* found end of entity value - can store it now */
5042
return storeEntityValue(parser, parser->m_encoding, s, end,
5043
XML_ACCOUNT_DIRECT, NULL);
5044
} else if (tok == XML_TOK_XML_DECL) {
5045
enum XML_Error result;
5046
result = processXmlDecl(parser, 0, start, next);
5047
if (result != XML_ERROR_NONE)
5048
return result;
5049
/* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
5050
* that to happen, a parameter entity parsing handler must have attempted
5051
* to suspend the parser, which fails and raises an error. The parser can
5052
* be aborted, but can't be suspended.
5053
*/
5054
if (parser->m_parsingStatus.parsing == XML_FINISHED)
5055
return XML_ERROR_ABORTED;
5056
*nextPtr = next;
5057
/* stop scanning for text declaration - we found one */
5058
parser->m_processor = entityValueProcessor;
5059
return entityValueProcessor(parser, next, end, nextPtr);
5060
}
5061
/* XmlPrologTok has now set the encoding based on the BOM it found, and we
5062
must move s and nextPtr forward to consume the BOM.
5063
5064
If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
5065
would leave the BOM in the buffer and return. On the next call to this
5066
function, our XmlPrologTok call would return XML_TOK_INVALID, since it
5067
is not valid to have multiple BOMs.
5068
*/
5069
else if (tok == XML_TOK_BOM) {
5070
# if XML_GE == 1
5071
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5072
XML_ACCOUNT_DIRECT)) {
5073
accountingOnAbort(parser);
5074
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5075
}
5076
# endif
5077
5078
*nextPtr = next;
5079
s = next;
5080
}
5081
/* If we get this token, we have the start of what might be a
5082
normal tag, but not a declaration (i.e. it doesn't begin with
5083
"<!"). In a DTD context, that isn't legal.
5084
*/
5085
else if (tok == XML_TOK_INSTANCE_START) {
5086
*nextPtr = next;
5087
return XML_ERROR_SYNTAX;
5088
}
5089
start = next;
5090
parser->m_eventPtr = start;
5091
}
5092
}
5093
5094
static enum XML_Error PTRCALL
5095
externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
5096
const char **nextPtr) {
5097
const char *next = s;
5098
int tok;
5099
5100
tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5101
if (tok <= 0) {
5102
if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
5103
*nextPtr = s;
5104
return XML_ERROR_NONE;
5105
}
5106
switch (tok) {
5107
case XML_TOK_INVALID:
5108
return XML_ERROR_INVALID_TOKEN;
5109
case XML_TOK_PARTIAL:
5110
return XML_ERROR_UNCLOSED_TOKEN;
5111
case XML_TOK_PARTIAL_CHAR:
5112
return XML_ERROR_PARTIAL_CHAR;
5113
case XML_TOK_NONE: /* start == end */
5114
default:
5115
break;
5116
}
5117
}
5118
/* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
5119
However, when parsing an external subset, doProlog will not accept a BOM
5120
as valid, and report a syntax error, so we have to skip the BOM, and
5121
account for the BOM bytes.
5122
*/
5123
else if (tok == XML_TOK_BOM) {
5124
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5125
XML_ACCOUNT_DIRECT)) {
5126
accountingOnAbort(parser);
5127
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5128
}
5129
5130
s = next;
5131
tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5132
}
5133
5134
parser->m_processor = prologProcessor;
5135
return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5136
(XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5137
XML_ACCOUNT_DIRECT);
5138
}
5139
5140
static enum XML_Error PTRCALL
5141
entityValueProcessor(XML_Parser parser, const char *s, const char *end,
5142
const char **nextPtr) {
5143
const char *start = s;
5144
const char *next = s;
5145
const ENCODING *enc = parser->m_encoding;
5146
int tok;
5147
5148
for (;;) {
5149
tok = XmlPrologTok(enc, start, end, &next);
5150
/* Note: These bytes are accounted later in:
5151
- storeEntityValue
5152
*/
5153
if (tok <= 0) {
5154
if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
5155
*nextPtr = s;
5156
return XML_ERROR_NONE;
5157
}
5158
switch (tok) {
5159
case XML_TOK_INVALID:
5160
return XML_ERROR_INVALID_TOKEN;
5161
case XML_TOK_PARTIAL:
5162
return XML_ERROR_UNCLOSED_TOKEN;
5163
case XML_TOK_PARTIAL_CHAR:
5164
return XML_ERROR_PARTIAL_CHAR;
5165
case XML_TOK_NONE: /* start == end */
5166
default:
5167
break;
5168
}
5169
/* found end of entity value - can store it now */
5170
return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL);
5171
}
5172
start = next;
5173
}
5174
}
5175
5176
#endif /* XML_DTD */
5177
5178
static enum XML_Error PTRCALL
5179
prologProcessor(XML_Parser parser, const char *s, const char *end,
5180
const char **nextPtr) {
5181
const char *next = s;
5182
int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5183
return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5184
(XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5185
XML_ACCOUNT_DIRECT);
5186
}
5187
5188
static enum XML_Error
5189
doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
5190
int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
5191
XML_Bool allowClosingDoctype, enum XML_Account account) {
5192
#ifdef XML_DTD
5193
static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
5194
#endif /* XML_DTD */
5195
static const XML_Char atypeCDATA[]
5196
= {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
5197
static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
5198
static const XML_Char atypeIDREF[]
5199
= {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
5200
static const XML_Char atypeIDREFS[]
5201
= {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
5202
static const XML_Char atypeENTITY[]
5203
= {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
5204
static const XML_Char atypeENTITIES[]
5205
= {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
5206
ASCII_I, ASCII_E, ASCII_S, '\0'};
5207
static const XML_Char atypeNMTOKEN[]
5208
= {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
5209
static const XML_Char atypeNMTOKENS[]
5210
= {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
5211
ASCII_E, ASCII_N, ASCII_S, '\0'};
5212
static const XML_Char notationPrefix[]
5213
= {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
5214
ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
5215
static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
5216
static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
5217
5218
#ifndef XML_DTD
5219
UNUSED_P(account);
5220
#endif
5221
5222
/* save one level of indirection */
5223
DTD *const dtd = parser->m_dtd;
5224
5225
const char **eventPP;
5226
const char **eventEndPP;
5227
enum XML_Content_Quant quant;
5228
5229
if (enc == parser->m_encoding) {
5230
eventPP = &parser->m_eventPtr;
5231
eventEndPP = &parser->m_eventEndPtr;
5232
} else {
5233
eventPP = &(parser->m_openInternalEntities->internalEventPtr);
5234
eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
5235
}
5236
5237
for (;;) {
5238
int role;
5239
XML_Bool handleDefault = XML_TRUE;
5240
*eventPP = s;
5241
*eventEndPP = next;
5242
if (tok <= 0) {
5243
if (haveMore && tok != XML_TOK_INVALID) {
5244
*nextPtr = s;
5245
return XML_ERROR_NONE;
5246
}
5247
switch (tok) {
5248
case XML_TOK_INVALID:
5249
*eventPP = next;
5250
return XML_ERROR_INVALID_TOKEN;
5251
case XML_TOK_PARTIAL:
5252
return XML_ERROR_UNCLOSED_TOKEN;
5253
case XML_TOK_PARTIAL_CHAR:
5254
return XML_ERROR_PARTIAL_CHAR;
5255
case -XML_TOK_PROLOG_S:
5256
tok = -tok;
5257
break;
5258
case XML_TOK_NONE:
5259
#ifdef XML_DTD
5260
/* for internal PE NOT referenced between declarations */
5261
if (enc != parser->m_encoding
5262
&& ! parser->m_openInternalEntities->betweenDecl) {
5263
*nextPtr = s;
5264
return XML_ERROR_NONE;
5265
}
5266
/* WFC: PE Between Declarations - must check that PE contains
5267
complete markup, not only for external PEs, but also for
5268
internal PEs if the reference occurs between declarations.
5269
*/
5270
if (parser->m_isParamEntity || enc != parser->m_encoding) {
5271
if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
5272
== XML_ROLE_ERROR)
5273
return XML_ERROR_INCOMPLETE_PE;
5274
*nextPtr = s;
5275
return XML_ERROR_NONE;
5276
}
5277
#endif /* XML_DTD */
5278
return XML_ERROR_NO_ELEMENTS;
5279
default:
5280
tok = -tok;
5281
next = end;
5282
break;
5283
}
5284
}
5285
role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
5286
#if XML_GE == 1
5287
switch (role) {
5288
case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
5289
case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
5290
# ifdef XML_DTD
5291
case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
5292
# endif
5293
break;
5294
default:
5295
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
5296
accountingOnAbort(parser);
5297
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5298
}
5299
}
5300
#endif
5301
switch (role) {
5302
case XML_ROLE_XML_DECL: {
5303
enum XML_Error result = processXmlDecl(parser, 0, s, next);
5304
if (result != XML_ERROR_NONE)
5305
return result;
5306
enc = parser->m_encoding;
5307
handleDefault = XML_FALSE;
5308
} break;
5309
case XML_ROLE_DOCTYPE_NAME:
5310
if (parser->m_startDoctypeDeclHandler) {
5311
parser->m_doctypeName
5312
= poolStoreString(&parser->m_tempPool, enc, s, next);
5313
if (! parser->m_doctypeName)
5314
return XML_ERROR_NO_MEMORY;
5315
poolFinish(&parser->m_tempPool);
5316
parser->m_doctypePubid = NULL;
5317
handleDefault = XML_FALSE;
5318
}
5319
parser->m_doctypeSysid = NULL; /* always initialize to NULL */
5320
break;
5321
case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
5322
if (parser->m_startDoctypeDeclHandler) {
5323
parser->m_startDoctypeDeclHandler(
5324
parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
5325
parser->m_doctypePubid, 1);
5326
parser->m_doctypeName = NULL;
5327
poolClear(&parser->m_tempPool);
5328
handleDefault = XML_FALSE;
5329
}
5330
break;
5331
#ifdef XML_DTD
5332
case XML_ROLE_TEXT_DECL: {
5333
enum XML_Error result = processXmlDecl(parser, 1, s, next);
5334
if (result != XML_ERROR_NONE)
5335
return result;
5336
enc = parser->m_encoding;
5337
handleDefault = XML_FALSE;
5338
} break;
5339
#endif /* XML_DTD */
5340
case XML_ROLE_DOCTYPE_PUBLIC_ID:
5341
#ifdef XML_DTD
5342
parser->m_useForeignDTD = XML_FALSE;
5343
parser->m_declEntity = (ENTITY *)lookup(
5344
parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5345
if (! parser->m_declEntity)
5346
return XML_ERROR_NO_MEMORY;
5347
#endif /* XML_DTD */
5348
dtd->hasParamEntityRefs = XML_TRUE;
5349
if (parser->m_startDoctypeDeclHandler) {
5350
XML_Char *pubId;
5351
if (! XmlIsPublicId(enc, s, next, eventPP))
5352
return XML_ERROR_PUBLICID;
5353
pubId = poolStoreString(&parser->m_tempPool, enc,
5354
s + enc->minBytesPerChar,
5355
next - enc->minBytesPerChar);
5356
if (! pubId)
5357
return XML_ERROR_NO_MEMORY;
5358
normalizePublicId(pubId);
5359
poolFinish(&parser->m_tempPool);
5360
parser->m_doctypePubid = pubId;
5361
handleDefault = XML_FALSE;
5362
goto alreadyChecked;
5363
}
5364
/* fall through */
5365
case XML_ROLE_ENTITY_PUBLIC_ID:
5366
if (! XmlIsPublicId(enc, s, next, eventPP))
5367
return XML_ERROR_PUBLICID;
5368
alreadyChecked:
5369
if (dtd->keepProcessing && parser->m_declEntity) {
5370
XML_Char *tem
5371
= poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5372
next - enc->minBytesPerChar);
5373
if (! tem)
5374
return XML_ERROR_NO_MEMORY;
5375
normalizePublicId(tem);
5376
parser->m_declEntity->publicId = tem;
5377
poolFinish(&dtd->pool);
5378
/* Don't suppress the default handler if we fell through from
5379
* the XML_ROLE_DOCTYPE_PUBLIC_ID case.
5380
*/
5381
if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
5382
handleDefault = XML_FALSE;
5383
}
5384
break;
5385
case XML_ROLE_DOCTYPE_CLOSE:
5386
if (allowClosingDoctype != XML_TRUE) {
5387
/* Must not close doctype from within expanded parameter entities */
5388
return XML_ERROR_INVALID_TOKEN;
5389
}
5390
5391
if (parser->m_doctypeName) {
5392
parser->m_startDoctypeDeclHandler(
5393
parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
5394
parser->m_doctypePubid, 0);
5395
poolClear(&parser->m_tempPool);
5396
handleDefault = XML_FALSE;
5397
}
5398
/* parser->m_doctypeSysid will be non-NULL in the case of a previous
5399
XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
5400
was not set, indicating an external subset
5401
*/
5402
#ifdef XML_DTD
5403
if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
5404
XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
5405
dtd->hasParamEntityRefs = XML_TRUE;
5406
if (parser->m_paramEntityParsing
5407
&& parser->m_externalEntityRefHandler) {
5408
ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5409
externalSubsetName, sizeof(ENTITY));
5410
if (! entity) {
5411
/* The external subset name "#" will have already been
5412
* inserted into the hash table at the start of the
5413
* external entity parsing, so no allocation will happen
5414
* and lookup() cannot fail.
5415
*/
5416
return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
5417
}
5418
if (parser->m_useForeignDTD)
5419
entity->base = parser->m_curBase;
5420
dtd->paramEntityRead = XML_FALSE;
5421
if (! parser->m_externalEntityRefHandler(
5422
parser->m_externalEntityRefHandlerArg, 0, entity->base,
5423
entity->systemId, entity->publicId))
5424
return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5425
if (dtd->paramEntityRead) {
5426
if (! dtd->standalone && parser->m_notStandaloneHandler
5427
&& ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5428
return XML_ERROR_NOT_STANDALONE;
5429
}
5430
/* if we didn't read the foreign DTD then this means that there
5431
is no external subset and we must reset dtd->hasParamEntityRefs
5432
*/
5433
else if (! parser->m_doctypeSysid)
5434
dtd->hasParamEntityRefs = hadParamEntityRefs;
5435
/* end of DTD - no need to update dtd->keepProcessing */
5436
}
5437
parser->m_useForeignDTD = XML_FALSE;
5438
}
5439
#endif /* XML_DTD */
5440
if (parser->m_endDoctypeDeclHandler) {
5441
parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
5442
handleDefault = XML_FALSE;
5443
}
5444
break;
5445
case XML_ROLE_INSTANCE_START:
5446
#ifdef XML_DTD
5447
/* if there is no DOCTYPE declaration then now is the
5448
last chance to read the foreign DTD
5449
*/
5450
if (parser->m_useForeignDTD) {
5451
XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
5452
dtd->hasParamEntityRefs = XML_TRUE;
5453
if (parser->m_paramEntityParsing
5454
&& parser->m_externalEntityRefHandler) {
5455
ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5456
externalSubsetName, sizeof(ENTITY));
5457
if (! entity)
5458
return XML_ERROR_NO_MEMORY;
5459
entity->base = parser->m_curBase;
5460
dtd->paramEntityRead = XML_FALSE;
5461
if (! parser->m_externalEntityRefHandler(
5462
parser->m_externalEntityRefHandlerArg, 0, entity->base,
5463
entity->systemId, entity->publicId))
5464
return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5465
if (dtd->paramEntityRead) {
5466
if (! dtd->standalone && parser->m_notStandaloneHandler
5467
&& ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5468
return XML_ERROR_NOT_STANDALONE;
5469
}
5470
/* if we didn't read the foreign DTD then this means that there
5471
is no external subset and we must reset dtd->hasParamEntityRefs
5472
*/
5473
else
5474
dtd->hasParamEntityRefs = hadParamEntityRefs;
5475
/* end of DTD - no need to update dtd->keepProcessing */
5476
}
5477
}
5478
#endif /* XML_DTD */
5479
parser->m_processor = contentProcessor;
5480
return contentProcessor(parser, s, end, nextPtr);
5481
case XML_ROLE_ATTLIST_ELEMENT_NAME:
5482
parser->m_declElementType = getElementType(parser, enc, s, next);
5483
if (! parser->m_declElementType)
5484
return XML_ERROR_NO_MEMORY;
5485
goto checkAttListDeclHandler;
5486
case XML_ROLE_ATTRIBUTE_NAME:
5487
parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
5488
if (! parser->m_declAttributeId)
5489
return XML_ERROR_NO_MEMORY;
5490
parser->m_declAttributeIsCdata = XML_FALSE;
5491
parser->m_declAttributeType = NULL;
5492
parser->m_declAttributeIsId = XML_FALSE;
5493
goto checkAttListDeclHandler;
5494
case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
5495
parser->m_declAttributeIsCdata = XML_TRUE;
5496
parser->m_declAttributeType = atypeCDATA;
5497
goto checkAttListDeclHandler;
5498
case XML_ROLE_ATTRIBUTE_TYPE_ID:
5499
parser->m_declAttributeIsId = XML_TRUE;
5500
parser->m_declAttributeType = atypeID;
5501
goto checkAttListDeclHandler;
5502
case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
5503
parser->m_declAttributeType = atypeIDREF;
5504
goto checkAttListDeclHandler;
5505
case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
5506
parser->m_declAttributeType = atypeIDREFS;
5507
goto checkAttListDeclHandler;
5508
case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
5509
parser->m_declAttributeType = atypeENTITY;
5510
goto checkAttListDeclHandler;
5511
case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
5512
parser->m_declAttributeType = atypeENTITIES;
5513
goto checkAttListDeclHandler;
5514
case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
5515
parser->m_declAttributeType = atypeNMTOKEN;
5516
goto checkAttListDeclHandler;
5517
case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5518
parser->m_declAttributeType = atypeNMTOKENS;
5519
checkAttListDeclHandler:
5520
if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5521
handleDefault = XML_FALSE;
5522
break;
5523
case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5524
case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5525
if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5526
const XML_Char *prefix;
5527
if (parser->m_declAttributeType) {
5528
prefix = enumValueSep;
5529
} else {
5530
prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5531
: enumValueStart);
5532
}
5533
if (! poolAppendString(&parser->m_tempPool, prefix))
5534
return XML_ERROR_NO_MEMORY;
5535
if (! poolAppend(&parser->m_tempPool, enc, s, next))
5536
return XML_ERROR_NO_MEMORY;
5537
parser->m_declAttributeType = parser->m_tempPool.start;
5538
handleDefault = XML_FALSE;
5539
}
5540
break;
5541
case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5542
case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5543
if (dtd->keepProcessing) {
5544
if (! defineAttribute(parser->m_declElementType,
5545
parser->m_declAttributeId,
5546
parser->m_declAttributeIsCdata,
5547
parser->m_declAttributeIsId, 0, parser))
5548
return XML_ERROR_NO_MEMORY;
5549
if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5550
if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5551
|| (*parser->m_declAttributeType == XML_T(ASCII_N)
5552
&& parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5553
/* Enumerated or Notation type */
5554
if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5555
|| ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5556
return XML_ERROR_NO_MEMORY;
5557
parser->m_declAttributeType = parser->m_tempPool.start;
5558
poolFinish(&parser->m_tempPool);
5559
}
5560
*eventEndPP = s;
5561
parser->m_attlistDeclHandler(
5562
parser->m_handlerArg, parser->m_declElementType->name,
5563
parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5564
role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5565
handleDefault = XML_FALSE;
5566
}
5567
}
5568
poolClear(&parser->m_tempPool);
5569
break;
5570
case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5571
case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5572
if (dtd->keepProcessing) {
5573
const XML_Char *attVal;
5574
enum XML_Error result = storeAttributeValue(
5575
parser, enc, parser->m_declAttributeIsCdata,
5576
s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5577
XML_ACCOUNT_NONE);
5578
if (result)
5579
return result;
5580
attVal = poolStart(&dtd->pool);
5581
poolFinish(&dtd->pool);
5582
/* ID attributes aren't allowed to have a default */
5583
if (! defineAttribute(
5584
parser->m_declElementType, parser->m_declAttributeId,
5585
parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5586
return XML_ERROR_NO_MEMORY;
5587
if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5588
if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5589
|| (*parser->m_declAttributeType == XML_T(ASCII_N)
5590
&& parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5591
/* Enumerated or Notation type */
5592
if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5593
|| ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5594
return XML_ERROR_NO_MEMORY;
5595
parser->m_declAttributeType = parser->m_tempPool.start;
5596
poolFinish(&parser->m_tempPool);
5597
}
5598
*eventEndPP = s;
5599
parser->m_attlistDeclHandler(
5600
parser->m_handlerArg, parser->m_declElementType->name,
5601
parser->m_declAttributeId->name, parser->m_declAttributeType,
5602
attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5603
poolClear(&parser->m_tempPool);
5604
handleDefault = XML_FALSE;
5605
}
5606
}
5607
break;
5608
case XML_ROLE_ENTITY_VALUE:
5609
if (dtd->keepProcessing) {
5610
#if XML_GE == 1
5611
// This will store the given replacement text in
5612
// parser->m_declEntity->textPtr.
5613
enum XML_Error result = callStoreEntityValue(
5614
parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar,
5615
XML_ACCOUNT_NONE);
5616
if (parser->m_declEntity) {
5617
parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5618
parser->m_declEntity->textLen
5619
= (int)(poolLength(&dtd->entityValuePool));
5620
poolFinish(&dtd->entityValuePool);
5621
if (parser->m_entityDeclHandler) {
5622
*eventEndPP = s;
5623
parser->m_entityDeclHandler(
5624
parser->m_handlerArg, parser->m_declEntity->name,
5625
parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5626
parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5627
handleDefault = XML_FALSE;
5628
}
5629
} else
5630
poolDiscard(&dtd->entityValuePool);
5631
if (result != XML_ERROR_NONE)
5632
return result;
5633
#else
5634
// This will store "&amp;entity123;" in parser->m_declEntity->textPtr
5635
// to end up as "&entity123;" in the handler.
5636
if (parser->m_declEntity != NULL) {
5637
const enum XML_Error result
5638
= storeSelfEntityValue(parser, parser->m_declEntity);
5639
if (result != XML_ERROR_NONE)
5640
return result;
5641
5642
if (parser->m_entityDeclHandler) {
5643
*eventEndPP = s;
5644
parser->m_entityDeclHandler(
5645
parser->m_handlerArg, parser->m_declEntity->name,
5646
parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5647
parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5648
handleDefault = XML_FALSE;
5649
}
5650
}
5651
#endif
5652
}
5653
break;
5654
case XML_ROLE_DOCTYPE_SYSTEM_ID:
5655
#ifdef XML_DTD
5656
parser->m_useForeignDTD = XML_FALSE;
5657
#endif /* XML_DTD */
5658
dtd->hasParamEntityRefs = XML_TRUE;
5659
if (parser->m_startDoctypeDeclHandler) {
5660
parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5661
s + enc->minBytesPerChar,
5662
next - enc->minBytesPerChar);
5663
if (parser->m_doctypeSysid == NULL)
5664
return XML_ERROR_NO_MEMORY;
5665
poolFinish(&parser->m_tempPool);
5666
handleDefault = XML_FALSE;
5667
}
5668
#ifdef XML_DTD
5669
else
5670
/* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5671
for the case where no parser->m_startDoctypeDeclHandler is set */
5672
parser->m_doctypeSysid = externalSubsetName;
5673
#endif /* XML_DTD */
5674
if (! dtd->standalone
5675
#ifdef XML_DTD
5676
&& ! parser->m_paramEntityParsing
5677
#endif /* XML_DTD */
5678
&& parser->m_notStandaloneHandler
5679
&& ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5680
return XML_ERROR_NOT_STANDALONE;
5681
#ifndef XML_DTD
5682
break;
5683
#else /* XML_DTD */
5684
if (! parser->m_declEntity) {
5685
parser->m_declEntity = (ENTITY *)lookup(
5686
parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5687
if (! parser->m_declEntity)
5688
return XML_ERROR_NO_MEMORY;
5689
parser->m_declEntity->publicId = NULL;
5690
}
5691
#endif /* XML_DTD */
5692
/* fall through */
5693
case XML_ROLE_ENTITY_SYSTEM_ID:
5694
if (dtd->keepProcessing && parser->m_declEntity) {
5695
parser->m_declEntity->systemId
5696
= poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5697
next - enc->minBytesPerChar);
5698
if (! parser->m_declEntity->systemId)
5699
return XML_ERROR_NO_MEMORY;
5700
parser->m_declEntity->base = parser->m_curBase;
5701
poolFinish(&dtd->pool);
5702
/* Don't suppress the default handler if we fell through from
5703
* the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5704
*/
5705
if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5706
handleDefault = XML_FALSE;
5707
}
5708
break;
5709
case XML_ROLE_ENTITY_COMPLETE:
5710
#if XML_GE == 0
5711
// This will store "&amp;entity123;" in entity->textPtr
5712
// to end up as "&entity123;" in the handler.
5713
if (parser->m_declEntity != NULL) {
5714
const enum XML_Error result
5715
= storeSelfEntityValue(parser, parser->m_declEntity);
5716
if (result != XML_ERROR_NONE)
5717
return result;
5718
}
5719
#endif
5720
if (dtd->keepProcessing && parser->m_declEntity
5721
&& parser->m_entityDeclHandler) {
5722
*eventEndPP = s;
5723
parser->m_entityDeclHandler(
5724
parser->m_handlerArg, parser->m_declEntity->name,
5725
parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5726
parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5727
handleDefault = XML_FALSE;
5728
}
5729
break;
5730
case XML_ROLE_ENTITY_NOTATION_NAME:
5731
if (dtd->keepProcessing && parser->m_declEntity) {
5732
parser->m_declEntity->notation
5733
= poolStoreString(&dtd->pool, enc, s, next);
5734
if (! parser->m_declEntity->notation)
5735
return XML_ERROR_NO_MEMORY;
5736
poolFinish(&dtd->pool);
5737
if (parser->m_unparsedEntityDeclHandler) {
5738
*eventEndPP = s;
5739
parser->m_unparsedEntityDeclHandler(
5740
parser->m_handlerArg, parser->m_declEntity->name,
5741
parser->m_declEntity->base, parser->m_declEntity->systemId,
5742
parser->m_declEntity->publicId, parser->m_declEntity->notation);
5743
handleDefault = XML_FALSE;
5744
} else if (parser->m_entityDeclHandler) {
5745
*eventEndPP = s;
5746
parser->m_entityDeclHandler(
5747
parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5748
parser->m_declEntity->base, parser->m_declEntity->systemId,
5749
parser->m_declEntity->publicId, parser->m_declEntity->notation);
5750
handleDefault = XML_FALSE;
5751
}
5752
}
5753
break;
5754
case XML_ROLE_GENERAL_ENTITY_NAME: {
5755
if (XmlPredefinedEntityName(enc, s, next)) {
5756
parser->m_declEntity = NULL;
5757
break;
5758
}
5759
if (dtd->keepProcessing) {
5760
const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5761
if (! name)
5762
return XML_ERROR_NO_MEMORY;
5763
parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5764
name, sizeof(ENTITY));
5765
if (! parser->m_declEntity)
5766
return XML_ERROR_NO_MEMORY;
5767
if (parser->m_declEntity->name != name) {
5768
poolDiscard(&dtd->pool);
5769
parser->m_declEntity = NULL;
5770
} else {
5771
poolFinish(&dtd->pool);
5772
parser->m_declEntity->publicId = NULL;
5773
parser->m_declEntity->is_param = XML_FALSE;
5774
/* if we have a parent parser or are reading an internal parameter
5775
entity, then the entity declaration is not considered "internal"
5776
*/
5777
parser->m_declEntity->is_internal
5778
= ! (parser->m_parentParser || parser->m_openInternalEntities);
5779
if (parser->m_entityDeclHandler)
5780
handleDefault = XML_FALSE;
5781
}
5782
} else {
5783
poolDiscard(&dtd->pool);
5784
parser->m_declEntity = NULL;
5785
}
5786
} break;
5787
case XML_ROLE_PARAM_ENTITY_NAME:
5788
#ifdef XML_DTD
5789
if (dtd->keepProcessing) {
5790
const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5791
if (! name)
5792
return XML_ERROR_NO_MEMORY;
5793
parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5794
name, sizeof(ENTITY));
5795
if (! parser->m_declEntity)
5796
return XML_ERROR_NO_MEMORY;
5797
if (parser->m_declEntity->name != name) {
5798
poolDiscard(&dtd->pool);
5799
parser->m_declEntity = NULL;
5800
} else {
5801
poolFinish(&dtd->pool);
5802
parser->m_declEntity->publicId = NULL;
5803
parser->m_declEntity->is_param = XML_TRUE;
5804
/* if we have a parent parser or are reading an internal parameter
5805
entity, then the entity declaration is not considered "internal"
5806
*/
5807
parser->m_declEntity->is_internal
5808
= ! (parser->m_parentParser || parser->m_openInternalEntities);
5809
if (parser->m_entityDeclHandler)
5810
handleDefault = XML_FALSE;
5811
}
5812
} else {
5813
poolDiscard(&dtd->pool);
5814
parser->m_declEntity = NULL;
5815
}
5816
#else /* not XML_DTD */
5817
parser->m_declEntity = NULL;
5818
#endif /* XML_DTD */
5819
break;
5820
case XML_ROLE_NOTATION_NAME:
5821
parser->m_declNotationPublicId = NULL;
5822
parser->m_declNotationName = NULL;
5823
if (parser->m_notationDeclHandler) {
5824
parser->m_declNotationName
5825
= poolStoreString(&parser->m_tempPool, enc, s, next);
5826
if (! parser->m_declNotationName)
5827
return XML_ERROR_NO_MEMORY;
5828
poolFinish(&parser->m_tempPool);
5829
handleDefault = XML_FALSE;
5830
}
5831
break;
5832
case XML_ROLE_NOTATION_PUBLIC_ID:
5833
if (! XmlIsPublicId(enc, s, next, eventPP))
5834
return XML_ERROR_PUBLICID;
5835
if (parser
5836
->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5837
XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5838
s + enc->minBytesPerChar,
5839
next - enc->minBytesPerChar);
5840
if (! tem)
5841
return XML_ERROR_NO_MEMORY;
5842
normalizePublicId(tem);
5843
parser->m_declNotationPublicId = tem;
5844
poolFinish(&parser->m_tempPool);
5845
handleDefault = XML_FALSE;
5846
}
5847
break;
5848
case XML_ROLE_NOTATION_SYSTEM_ID:
5849
if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5850
const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5851
s + enc->minBytesPerChar,
5852
next - enc->minBytesPerChar);
5853
if (! systemId)
5854
return XML_ERROR_NO_MEMORY;
5855
*eventEndPP = s;
5856
parser->m_notationDeclHandler(
5857
parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5858
systemId, parser->m_declNotationPublicId);
5859
handleDefault = XML_FALSE;
5860
}
5861
poolClear(&parser->m_tempPool);
5862
break;
5863
case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5864
if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5865
*eventEndPP = s;
5866
parser->m_notationDeclHandler(
5867
parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5868
0, parser->m_declNotationPublicId);
5869
handleDefault = XML_FALSE;
5870
}
5871
poolClear(&parser->m_tempPool);
5872
break;
5873
case XML_ROLE_ERROR:
5874
switch (tok) {
5875
case XML_TOK_PARAM_ENTITY_REF:
5876
/* PE references in internal subset are
5877
not allowed within declarations. */
5878
return XML_ERROR_PARAM_ENTITY_REF;
5879
case XML_TOK_XML_DECL:
5880
return XML_ERROR_MISPLACED_XML_PI;
5881
default:
5882
return XML_ERROR_SYNTAX;
5883
}
5884
#ifdef XML_DTD
5885
case XML_ROLE_IGNORE_SECT: {
5886
enum XML_Error result;
5887
if (parser->m_defaultHandler)
5888
reportDefault(parser, enc, s, next);
5889
handleDefault = XML_FALSE;
5890
result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5891
if (result != XML_ERROR_NONE)
5892
return result;
5893
else if (! next) {
5894
parser->m_processor = ignoreSectionProcessor;
5895
return result;
5896
}
5897
} break;
5898
#endif /* XML_DTD */
5899
case XML_ROLE_GROUP_OPEN:
5900
if (parser->m_prologState.level >= parser->m_groupSize) {
5901
if (parser->m_groupSize) {
5902
{
5903
/* Detect and prevent integer overflow */
5904
if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5905
return XML_ERROR_NO_MEMORY;
5906
}
5907
5908
char *const new_connector = REALLOC(
5909
parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5910
if (new_connector == NULL) {
5911
parser->m_groupSize /= 2;
5912
return XML_ERROR_NO_MEMORY;
5913
}
5914
parser->m_groupConnector = new_connector;
5915
}
5916
5917
if (dtd->scaffIndex) {
5918
/* Detect and prevent integer overflow.
5919
* The preprocessor guard addresses the "always false" warning
5920
* from -Wtype-limits on platforms where
5921
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5922
#if UINT_MAX >= SIZE_MAX
5923
if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5924
return XML_ERROR_NO_MEMORY;
5925
}
5926
#endif
5927
5928
int *const new_scaff_index = REALLOC(
5929
parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5930
if (new_scaff_index == NULL)
5931
return XML_ERROR_NO_MEMORY;
5932
dtd->scaffIndex = new_scaff_index;
5933
}
5934
} else {
5935
parser->m_groupConnector = MALLOC(parser, parser->m_groupSize = 32);
5936
if (! parser->m_groupConnector) {
5937
parser->m_groupSize = 0;
5938
return XML_ERROR_NO_MEMORY;
5939
}
5940
}
5941
}
5942
parser->m_groupConnector[parser->m_prologState.level] = 0;
5943
if (dtd->in_eldecl) {
5944
int myindex = nextScaffoldPart(parser);
5945
if (myindex < 0)
5946
return XML_ERROR_NO_MEMORY;
5947
assert(dtd->scaffIndex != NULL);
5948
dtd->scaffIndex[dtd->scaffLevel] = myindex;
5949
dtd->scaffLevel++;
5950
dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5951
if (parser->m_elementDeclHandler)
5952
handleDefault = XML_FALSE;
5953
}
5954
break;
5955
case XML_ROLE_GROUP_SEQUENCE:
5956
if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5957
return XML_ERROR_SYNTAX;
5958
parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5959
if (dtd->in_eldecl && parser->m_elementDeclHandler)
5960
handleDefault = XML_FALSE;
5961
break;
5962
case XML_ROLE_GROUP_CHOICE:
5963
if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5964
return XML_ERROR_SYNTAX;
5965
if (dtd->in_eldecl
5966
&& ! parser->m_groupConnector[parser->m_prologState.level]
5967
&& (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5968
!= XML_CTYPE_MIXED)) {
5969
dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5970
= XML_CTYPE_CHOICE;
5971
if (parser->m_elementDeclHandler)
5972
handleDefault = XML_FALSE;
5973
}
5974
parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5975
break;
5976
case XML_ROLE_PARAM_ENTITY_REF:
5977
#ifdef XML_DTD
5978
case XML_ROLE_INNER_PARAM_ENTITY_REF:
5979
dtd->hasParamEntityRefs = XML_TRUE;
5980
if (! parser->m_paramEntityParsing)
5981
dtd->keepProcessing = dtd->standalone;
5982
else {
5983
const XML_Char *name;
5984
ENTITY *entity;
5985
name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5986
next - enc->minBytesPerChar);
5987
if (! name)
5988
return XML_ERROR_NO_MEMORY;
5989
entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5990
poolDiscard(&dtd->pool);
5991
/* first, determine if a check for an existing declaration is needed;
5992
if yes, check that the entity exists, and that it is internal,
5993
otherwise call the skipped entity handler
5994
*/
5995
if (parser->m_prologState.documentEntity
5996
&& (dtd->standalone ? ! parser->m_openInternalEntities
5997
: ! dtd->hasParamEntityRefs)) {
5998
if (! entity)
5999
return XML_ERROR_UNDEFINED_ENTITY;
6000
else if (! entity->is_internal) {
6001
/* It's hard to exhaustively search the code to be sure,
6002
* but there doesn't seem to be a way of executing the
6003
* following line. There are two cases:
6004
*
6005
* If 'standalone' is false, the DTD must have no
6006
* parameter entities or we wouldn't have passed the outer
6007
* 'if' statement. That means the only entity in the hash
6008
* table is the external subset name "#" which cannot be
6009
* given as a parameter entity name in XML syntax, so the
6010
* lookup must have returned NULL and we don't even reach
6011
* the test for an internal entity.
6012
*
6013
* If 'standalone' is true, it does not seem to be
6014
* possible to create entities taking this code path that
6015
* are not internal entities, so fail the test above.
6016
*
6017
* Because this analysis is very uncertain, the code is
6018
* being left in place and merely removed from the
6019
* coverage test statistics.
6020
*/
6021
return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
6022
}
6023
} else if (! entity) {
6024
dtd->keepProcessing = dtd->standalone;
6025
/* cannot report skipped entities in declarations */
6026
if ((role == XML_ROLE_PARAM_ENTITY_REF)
6027
&& parser->m_skippedEntityHandler) {
6028
parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
6029
handleDefault = XML_FALSE;
6030
}
6031
break;
6032
}
6033
if (entity->open)
6034
return XML_ERROR_RECURSIVE_ENTITY_REF;
6035
if (entity->textPtr) {
6036
enum XML_Error result;
6037
XML_Bool betweenDecl
6038
= (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
6039
result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL);
6040
if (result != XML_ERROR_NONE)
6041
return result;
6042
handleDefault = XML_FALSE;
6043
break;
6044
}
6045
if (parser->m_externalEntityRefHandler) {
6046
dtd->paramEntityRead = XML_FALSE;
6047
entity->open = XML_TRUE;
6048
entityTrackingOnOpen(parser, entity, __LINE__);
6049
if (! parser->m_externalEntityRefHandler(
6050
parser->m_externalEntityRefHandlerArg, 0, entity->base,
6051
entity->systemId, entity->publicId)) {
6052
entityTrackingOnClose(parser, entity, __LINE__);
6053
entity->open = XML_FALSE;
6054
return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6055
}
6056
entityTrackingOnClose(parser, entity, __LINE__);
6057
entity->open = XML_FALSE;
6058
handleDefault = XML_FALSE;
6059
if (! dtd->paramEntityRead) {
6060
dtd->keepProcessing = dtd->standalone;
6061
break;
6062
}
6063
} else {
6064
dtd->keepProcessing = dtd->standalone;
6065
break;
6066
}
6067
}
6068
#endif /* XML_DTD */
6069
if (! dtd->standalone && parser->m_notStandaloneHandler
6070
&& ! parser->m_notStandaloneHandler(parser->m_handlerArg))
6071
return XML_ERROR_NOT_STANDALONE;
6072
break;
6073
6074
/* Element declaration stuff */
6075
6076
case XML_ROLE_ELEMENT_NAME:
6077
if (parser->m_elementDeclHandler) {
6078
parser->m_declElementType = getElementType(parser, enc, s, next);
6079
if (! parser->m_declElementType)
6080
return XML_ERROR_NO_MEMORY;
6081
dtd->scaffLevel = 0;
6082
dtd->scaffCount = 0;
6083
dtd->in_eldecl = XML_TRUE;
6084
handleDefault = XML_FALSE;
6085
}
6086
break;
6087
6088
case XML_ROLE_CONTENT_ANY:
6089
case XML_ROLE_CONTENT_EMPTY:
6090
if (dtd->in_eldecl) {
6091
if (parser->m_elementDeclHandler) {
6092
// NOTE: We are avoiding MALLOC(..) here to so that
6093
// applications that are not using XML_FreeContentModel but
6094
// plain free(..) or .free_fcn() to free the content model's
6095
// memory are safe.
6096
XML_Content *content = parser->m_mem.malloc_fcn(sizeof(XML_Content));
6097
if (! content)
6098
return XML_ERROR_NO_MEMORY;
6099
content->quant = XML_CQUANT_NONE;
6100
content->name = NULL;
6101
content->numchildren = 0;
6102
content->children = NULL;
6103
content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
6104
: XML_CTYPE_EMPTY);
6105
*eventEndPP = s;
6106
parser->m_elementDeclHandler(
6107
parser->m_handlerArg, parser->m_declElementType->name, content);
6108
handleDefault = XML_FALSE;
6109
}
6110
dtd->in_eldecl = XML_FALSE;
6111
}
6112
break;
6113
6114
case XML_ROLE_CONTENT_PCDATA:
6115
if (dtd->in_eldecl) {
6116
dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
6117
= XML_CTYPE_MIXED;
6118
if (parser->m_elementDeclHandler)
6119
handleDefault = XML_FALSE;
6120
}
6121
break;
6122
6123
case XML_ROLE_CONTENT_ELEMENT:
6124
quant = XML_CQUANT_NONE;
6125
goto elementContent;
6126
case XML_ROLE_CONTENT_ELEMENT_OPT:
6127
quant = XML_CQUANT_OPT;
6128
goto elementContent;
6129
case XML_ROLE_CONTENT_ELEMENT_REP:
6130
quant = XML_CQUANT_REP;
6131
goto elementContent;
6132
case XML_ROLE_CONTENT_ELEMENT_PLUS:
6133
quant = XML_CQUANT_PLUS;
6134
elementContent:
6135
if (dtd->in_eldecl) {
6136
ELEMENT_TYPE *el;
6137
const XML_Char *name;
6138
size_t nameLen;
6139
const char *nxt
6140
= (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
6141
int myindex = nextScaffoldPart(parser);
6142
if (myindex < 0)
6143
return XML_ERROR_NO_MEMORY;
6144
dtd->scaffold[myindex].type = XML_CTYPE_NAME;
6145
dtd->scaffold[myindex].quant = quant;
6146
el = getElementType(parser, enc, s, nxt);
6147
if (! el)
6148
return XML_ERROR_NO_MEMORY;
6149
name = el->name;
6150
dtd->scaffold[myindex].name = name;
6151
nameLen = 0;
6152
while (name[nameLen++])
6153
;
6154
6155
/* Detect and prevent integer overflow */
6156
if (nameLen > UINT_MAX - dtd->contentStringLen) {
6157
return XML_ERROR_NO_MEMORY;
6158
}
6159
6160
dtd->contentStringLen += (unsigned)nameLen;
6161
if (parser->m_elementDeclHandler)
6162
handleDefault = XML_FALSE;
6163
}
6164
break;
6165
6166
case XML_ROLE_GROUP_CLOSE:
6167
quant = XML_CQUANT_NONE;
6168
goto closeGroup;
6169
case XML_ROLE_GROUP_CLOSE_OPT:
6170
quant = XML_CQUANT_OPT;
6171
goto closeGroup;
6172
case XML_ROLE_GROUP_CLOSE_REP:
6173
quant = XML_CQUANT_REP;
6174
goto closeGroup;
6175
case XML_ROLE_GROUP_CLOSE_PLUS:
6176
quant = XML_CQUANT_PLUS;
6177
closeGroup:
6178
if (dtd->in_eldecl) {
6179
if (parser->m_elementDeclHandler)
6180
handleDefault = XML_FALSE;
6181
dtd->scaffLevel--;
6182
dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
6183
if (dtd->scaffLevel == 0) {
6184
if (! handleDefault) {
6185
XML_Content *model = build_model(parser);
6186
if (! model)
6187
return XML_ERROR_NO_MEMORY;
6188
*eventEndPP = s;
6189
parser->m_elementDeclHandler(
6190
parser->m_handlerArg, parser->m_declElementType->name, model);
6191
}
6192
dtd->in_eldecl = XML_FALSE;
6193
dtd->contentStringLen = 0;
6194
}
6195
}
6196
break;
6197
/* End element declaration stuff */
6198
6199
case XML_ROLE_PI:
6200
if (! reportProcessingInstruction(parser, enc, s, next))
6201
return XML_ERROR_NO_MEMORY;
6202
handleDefault = XML_FALSE;
6203
break;
6204
case XML_ROLE_COMMENT:
6205
if (! reportComment(parser, enc, s, next))
6206
return XML_ERROR_NO_MEMORY;
6207
handleDefault = XML_FALSE;
6208
break;
6209
case XML_ROLE_NONE:
6210
switch (tok) {
6211
case XML_TOK_BOM:
6212
handleDefault = XML_FALSE;
6213
break;
6214
}
6215
break;
6216
case XML_ROLE_DOCTYPE_NONE:
6217
if (parser->m_startDoctypeDeclHandler)
6218
handleDefault = XML_FALSE;
6219
break;
6220
case XML_ROLE_ENTITY_NONE:
6221
if (dtd->keepProcessing && parser->m_entityDeclHandler)
6222
handleDefault = XML_FALSE;
6223
break;
6224
case XML_ROLE_NOTATION_NONE:
6225
if (parser->m_notationDeclHandler)
6226
handleDefault = XML_FALSE;
6227
break;
6228
case XML_ROLE_ATTLIST_NONE:
6229
if (dtd->keepProcessing && parser->m_attlistDeclHandler)
6230
handleDefault = XML_FALSE;
6231
break;
6232
case XML_ROLE_ELEMENT_NONE:
6233
if (parser->m_elementDeclHandler)
6234
handleDefault = XML_FALSE;
6235
break;
6236
} /* end of big switch */
6237
6238
if (handleDefault && parser->m_defaultHandler)
6239
reportDefault(parser, enc, s, next);
6240
6241
switch (parser->m_parsingStatus.parsing) {
6242
case XML_SUSPENDED:
6243
*nextPtr = next;
6244
return XML_ERROR_NONE;
6245
case XML_FINISHED:
6246
return XML_ERROR_ABORTED;
6247
case XML_PARSING:
6248
if (parser->m_reenter) {
6249
*nextPtr = next;
6250
return XML_ERROR_NONE;
6251
}
6252
/* Fall through */
6253
default:
6254
s = next;
6255
tok = XmlPrologTok(enc, s, end, &next);
6256
}
6257
}
6258
/* not reached */
6259
}
6260
6261
static enum XML_Error PTRCALL
6262
epilogProcessor(XML_Parser parser, const char *s, const char *end,
6263
const char **nextPtr) {
6264
parser->m_processor = epilogProcessor;
6265
parser->m_eventPtr = s;
6266
for (;;) {
6267
const char *next = NULL;
6268
int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
6269
#if XML_GE == 1
6270
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
6271
XML_ACCOUNT_DIRECT)) {
6272
accountingOnAbort(parser);
6273
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6274
}
6275
#endif
6276
parser->m_eventEndPtr = next;
6277
switch (tok) {
6278
/* report partial linebreak - it might be the last token */
6279
case -XML_TOK_PROLOG_S:
6280
if (parser->m_defaultHandler) {
6281
reportDefault(parser, parser->m_encoding, s, next);
6282
if (parser->m_parsingStatus.parsing == XML_FINISHED)
6283
return XML_ERROR_ABORTED;
6284
}
6285
*nextPtr = next;
6286
return XML_ERROR_NONE;
6287
case XML_TOK_NONE:
6288
*nextPtr = s;
6289
return XML_ERROR_NONE;
6290
case XML_TOK_PROLOG_S:
6291
if (parser->m_defaultHandler)
6292
reportDefault(parser, parser->m_encoding, s, next);
6293
break;
6294
case XML_TOK_PI:
6295
if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
6296
return XML_ERROR_NO_MEMORY;
6297
break;
6298
case XML_TOK_COMMENT:
6299
if (! reportComment(parser, parser->m_encoding, s, next))
6300
return XML_ERROR_NO_MEMORY;
6301
break;
6302
case XML_TOK_INVALID:
6303
parser->m_eventPtr = next;
6304
return XML_ERROR_INVALID_TOKEN;
6305
case XML_TOK_PARTIAL:
6306
if (! parser->m_parsingStatus.finalBuffer) {
6307
*nextPtr = s;
6308
return XML_ERROR_NONE;
6309
}
6310
return XML_ERROR_UNCLOSED_TOKEN;
6311
case XML_TOK_PARTIAL_CHAR:
6312
if (! parser->m_parsingStatus.finalBuffer) {
6313
*nextPtr = s;
6314
return XML_ERROR_NONE;
6315
}
6316
return XML_ERROR_PARTIAL_CHAR;
6317
default:
6318
return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
6319
}
6320
switch (parser->m_parsingStatus.parsing) {
6321
case XML_SUSPENDED:
6322
parser->m_eventPtr = next;
6323
*nextPtr = next;
6324
return XML_ERROR_NONE;
6325
case XML_FINISHED:
6326
parser->m_eventPtr = next;
6327
return XML_ERROR_ABORTED;
6328
case XML_PARSING:
6329
if (parser->m_reenter) {
6330
return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
6331
}
6332
/* Fall through */
6333
default:;
6334
parser->m_eventPtr = s = next;
6335
}
6336
}
6337
}
6338
6339
static enum XML_Error
6340
processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl,
6341
enum EntityType type) {
6342
OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList;
6343
switch (type) {
6344
case ENTITY_INTERNAL:
6345
parser->m_processor = internalEntityProcessor;
6346
openEntityList = &parser->m_openInternalEntities;
6347
freeEntityList = &parser->m_freeInternalEntities;
6348
break;
6349
case ENTITY_ATTRIBUTE:
6350
openEntityList = &parser->m_openAttributeEntities;
6351
freeEntityList = &parser->m_freeAttributeEntities;
6352
break;
6353
case ENTITY_VALUE:
6354
openEntityList = &parser->m_openValueEntities;
6355
freeEntityList = &parser->m_freeValueEntities;
6356
break;
6357
/* default case serves merely as a safety net in case of a
6358
* wrong entityType. Therefore we exclude the following lines
6359
* from the test coverage.
6360
*
6361
* LCOV_EXCL_START
6362
*/
6363
default:
6364
// Should not reach here
6365
assert(0);
6366
/* LCOV_EXCL_STOP */
6367
}
6368
6369
if (*freeEntityList) {
6370
openEntity = *freeEntityList;
6371
*freeEntityList = openEntity->next;
6372
} else {
6373
openEntity = MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
6374
if (! openEntity)
6375
return XML_ERROR_NO_MEMORY;
6376
}
6377
entity->open = XML_TRUE;
6378
entity->hasMore = XML_TRUE;
6379
#if XML_GE == 1
6380
entityTrackingOnOpen(parser, entity, __LINE__);
6381
#endif
6382
entity->processed = 0;
6383
openEntity->next = *openEntityList;
6384
*openEntityList = openEntity;
6385
openEntity->entity = entity;
6386
openEntity->type = type;
6387
openEntity->startTagLevel = parser->m_tagLevel;
6388
openEntity->betweenDecl = betweenDecl;
6389
openEntity->internalEventPtr = NULL;
6390
openEntity->internalEventEndPtr = NULL;
6391
6392
// Only internal entities make use of the reenter flag
6393
// therefore no need to set it for other entity types
6394
if (type == ENTITY_INTERNAL) {
6395
triggerReenter(parser);
6396
}
6397
return XML_ERROR_NONE;
6398
}
6399
6400
static enum XML_Error PTRCALL
6401
internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
6402
const char **nextPtr) {
6403
UNUSED_P(s);
6404
UNUSED_P(end);
6405
UNUSED_P(nextPtr);
6406
ENTITY *entity;
6407
const char *textStart, *textEnd;
6408
const char *next;
6409
enum XML_Error result;
6410
OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
6411
if (! openEntity)
6412
return XML_ERROR_UNEXPECTED_STATE;
6413
6414
entity = openEntity->entity;
6415
6416
// This will return early
6417
if (entity->hasMore) {
6418
textStart = ((const char *)entity->textPtr) + entity->processed;
6419
textEnd = (const char *)(entity->textPtr + entity->textLen);
6420
/* Set a safe default value in case 'next' does not get set */
6421
next = textStart;
6422
6423
if (entity->is_param) {
6424
int tok
6425
= XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
6426
result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
6427
tok, next, &next, XML_FALSE, XML_FALSE,
6428
XML_ACCOUNT_ENTITY_EXPANSION);
6429
} else {
6430
result = doContent(parser, openEntity->startTagLevel,
6431
parser->m_internalEncoding, textStart, textEnd, &next,
6432
XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
6433
}
6434
6435
if (result != XML_ERROR_NONE)
6436
return result;
6437
// Check if entity is complete, if not, mark down how much of it is
6438
// processed
6439
if (textEnd != next
6440
&& (parser->m_parsingStatus.parsing == XML_SUSPENDED
6441
|| (parser->m_parsingStatus.parsing == XML_PARSING
6442
&& parser->m_reenter))) {
6443
entity->processed = (int)(next - (const char *)entity->textPtr);
6444
return result;
6445
}
6446
6447
// Entity is complete. We cannot close it here since we need to first
6448
// process its possible inner entities (which are added to the
6449
// m_openInternalEntities during doProlog or doContent calls above)
6450
entity->hasMore = XML_FALSE;
6451
if (! entity->is_param
6452
&& (openEntity->startTagLevel != parser->m_tagLevel)) {
6453
return XML_ERROR_ASYNC_ENTITY;
6454
}
6455
triggerReenter(parser);
6456
return result;
6457
} // End of entity processing, "if" block will return here
6458
6459
// Remove fully processed openEntity from open entity list.
6460
#if XML_GE == 1
6461
entityTrackingOnClose(parser, entity, __LINE__);
6462
#endif
6463
// openEntity is m_openInternalEntities' head, as we set it at the start of
6464
// this function and we skipped doProlog and doContent calls with hasMore set
6465
// to false. This means we can directly remove the head of
6466
// m_openInternalEntities
6467
assert(parser->m_openInternalEntities == openEntity);
6468
entity->open = XML_FALSE;
6469
parser->m_openInternalEntities = parser->m_openInternalEntities->next;
6470
6471
/* put openEntity back in list of free instances */
6472
openEntity->next = parser->m_freeInternalEntities;
6473
parser->m_freeInternalEntities = openEntity;
6474
6475
if (parser->m_openInternalEntities == NULL) {
6476
parser->m_processor = entity->is_param ? prologProcessor : contentProcessor;
6477
}
6478
triggerReenter(parser);
6479
return XML_ERROR_NONE;
6480
}
6481
6482
static enum XML_Error PTRCALL
6483
errorProcessor(XML_Parser parser, const char *s, const char *end,
6484
const char **nextPtr) {
6485
UNUSED_P(s);
6486
UNUSED_P(end);
6487
UNUSED_P(nextPtr);
6488
return parser->m_errorCode;
6489
}
6490
6491
static enum XML_Error
6492
storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
6493
const char *ptr, const char *end, STRING_POOL *pool,
6494
enum XML_Account account) {
6495
const char *next = ptr;
6496
enum XML_Error result = XML_ERROR_NONE;
6497
6498
while (1) {
6499
if (! parser->m_openAttributeEntities) {
6500
result = appendAttributeValue(parser, enc, isCdata, next, end, pool,
6501
account, &next);
6502
} else {
6503
OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities;
6504
if (! openEntity)
6505
return XML_ERROR_UNEXPECTED_STATE;
6506
6507
ENTITY *const entity = openEntity->entity;
6508
const char *const textStart
6509
= ((const char *)entity->textPtr) + entity->processed;
6510
const char *const textEnd
6511
= (const char *)(entity->textPtr + entity->textLen);
6512
/* Set a safe default value in case 'next' does not get set */
6513
const char *nextInEntity = textStart;
6514
if (entity->hasMore) {
6515
result = appendAttributeValue(
6516
parser, parser->m_internalEncoding, isCdata, textStart, textEnd,
6517
pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity);
6518
if (result != XML_ERROR_NONE)
6519
break;
6520
// Check if entity is complete, if not, mark down how much of it is
6521
// processed. A XML_SUSPENDED check here is not required as
6522
// appendAttributeValue will never suspend the parser.
6523
if (textEnd != nextInEntity) {
6524
entity->processed
6525
= (int)(nextInEntity - (const char *)entity->textPtr);
6526
continue;
6527
}
6528
6529
// Entity is complete. We cannot close it here since we need to first
6530
// process its possible inner entities (which are added to the
6531
// m_openAttributeEntities during appendAttributeValue)
6532
entity->hasMore = XML_FALSE;
6533
continue;
6534
} // End of entity processing, "if" block skips the rest
6535
6536
// Remove fully processed openEntity from open entity list.
6537
#if XML_GE == 1
6538
entityTrackingOnClose(parser, entity, __LINE__);
6539
#endif
6540
// openEntity is m_openAttributeEntities' head, since we set it at the
6541
// start of this function and because we skipped appendAttributeValue call
6542
// with hasMore set to false. This means we can directly remove the head
6543
// of m_openAttributeEntities
6544
assert(parser->m_openAttributeEntities == openEntity);
6545
entity->open = XML_FALSE;
6546
parser->m_openAttributeEntities = parser->m_openAttributeEntities->next;
6547
6548
/* put openEntity back in list of free instances */
6549
openEntity->next = parser->m_freeAttributeEntities;
6550
parser->m_freeAttributeEntities = openEntity;
6551
}
6552
6553
// Break if an error occurred or there is nothing left to process
6554
if (result || (parser->m_openAttributeEntities == NULL && end == next)) {
6555
break;
6556
}
6557
}
6558
6559
if (result)
6560
return result;
6561
if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
6562
poolChop(pool);
6563
if (! poolAppendChar(pool, XML_T('\0')))
6564
return XML_ERROR_NO_MEMORY;
6565
return XML_ERROR_NONE;
6566
}
6567
6568
static enum XML_Error
6569
appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
6570
const char *ptr, const char *end, STRING_POOL *pool,
6571
enum XML_Account account, const char **nextPtr) {
6572
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6573
#ifndef XML_DTD
6574
UNUSED_P(account);
6575
#endif
6576
6577
for (;;) {
6578
const char *next
6579
= ptr; /* XmlAttributeValueTok doesn't always set the last arg */
6580
int tok = XmlAttributeValueTok(enc, ptr, end, &next);
6581
#if XML_GE == 1
6582
if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
6583
accountingOnAbort(parser);
6584
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6585
}
6586
#endif
6587
switch (tok) {
6588
case XML_TOK_NONE:
6589
if (nextPtr) {
6590
*nextPtr = next;
6591
}
6592
return XML_ERROR_NONE;
6593
case XML_TOK_INVALID:
6594
if (enc == parser->m_encoding)
6595
parser->m_eventPtr = next;
6596
return XML_ERROR_INVALID_TOKEN;
6597
case XML_TOK_PARTIAL:
6598
if (enc == parser->m_encoding)
6599
parser->m_eventPtr = ptr;
6600
return XML_ERROR_INVALID_TOKEN;
6601
case XML_TOK_CHAR_REF: {
6602
XML_Char buf[XML_ENCODE_MAX];
6603
int i;
6604
int n = XmlCharRefNumber(enc, ptr);
6605
if (n < 0) {
6606
if (enc == parser->m_encoding)
6607
parser->m_eventPtr = ptr;
6608
return XML_ERROR_BAD_CHAR_REF;
6609
}
6610
if (! isCdata && n == 0x20 /* space */
6611
&& (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6612
break;
6613
n = XmlEncode(n, (ICHAR *)buf);
6614
/* The XmlEncode() functions can never return 0 here. That
6615
* error return happens if the code point passed in is either
6616
* negative or greater than or equal to 0x110000. The
6617
* XmlCharRefNumber() functions will all return a number
6618
* strictly less than 0x110000 or a negative value if an error
6619
* occurred. The negative value is intercepted above, so
6620
* XmlEncode() is never passed a value it might return an
6621
* error for.
6622
*/
6623
for (i = 0; i < n; i++) {
6624
if (! poolAppendChar(pool, buf[i]))
6625
return XML_ERROR_NO_MEMORY;
6626
}
6627
} break;
6628
case XML_TOK_DATA_CHARS:
6629
if (! poolAppend(pool, enc, ptr, next))
6630
return XML_ERROR_NO_MEMORY;
6631
break;
6632
case XML_TOK_TRAILING_CR:
6633
next = ptr + enc->minBytesPerChar;
6634
/* fall through */
6635
case XML_TOK_ATTRIBUTE_VALUE_S:
6636
case XML_TOK_DATA_NEWLINE:
6637
if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6638
break;
6639
if (! poolAppendChar(pool, 0x20))
6640
return XML_ERROR_NO_MEMORY;
6641
break;
6642
case XML_TOK_ENTITY_REF: {
6643
const XML_Char *name;
6644
ENTITY *entity;
6645
bool checkEntityDecl;
6646
XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6647
enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6648
if (ch) {
6649
#if XML_GE == 1
6650
/* NOTE: We are replacing 4-6 characters original input for 1 character
6651
* so there is no amplification and hence recording without
6652
* protection. */
6653
accountingDiffTolerated(parser, tok, (char *)&ch,
6654
((char *)&ch) + sizeof(XML_Char), __LINE__,
6655
XML_ACCOUNT_ENTITY_EXPANSION);
6656
#endif /* XML_GE == 1 */
6657
if (! poolAppendChar(pool, ch))
6658
return XML_ERROR_NO_MEMORY;
6659
break;
6660
}
6661
name = poolStoreString(&parser->m_temp2Pool, enc,
6662
ptr + enc->minBytesPerChar,
6663
next - enc->minBytesPerChar);
6664
if (! name)
6665
return XML_ERROR_NO_MEMORY;
6666
entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6667
poolDiscard(&parser->m_temp2Pool);
6668
/* First, determine if a check for an existing declaration is needed;
6669
if yes, check that the entity exists, and that it is internal.
6670
*/
6671
if (pool == &dtd->pool) /* are we called from prolog? */
6672
checkEntityDecl =
6673
#ifdef XML_DTD
6674
parser->m_prologState.documentEntity &&
6675
#endif /* XML_DTD */
6676
(dtd->standalone ? ! parser->m_openInternalEntities
6677
: ! dtd->hasParamEntityRefs);
6678
else /* if (pool == &parser->m_tempPool): we are called from content */
6679
checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6680
if (checkEntityDecl) {
6681
if (! entity)
6682
return XML_ERROR_UNDEFINED_ENTITY;
6683
else if (! entity->is_internal)
6684
return XML_ERROR_ENTITY_DECLARED_IN_PE;
6685
} else if (! entity) {
6686
/* Cannot report skipped entity here - see comments on
6687
parser->m_skippedEntityHandler.
6688
if (parser->m_skippedEntityHandler)
6689
parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6690
*/
6691
/* Cannot call the default handler because this would be
6692
out of sync with the call to the startElementHandler.
6693
if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6694
reportDefault(parser, enc, ptr, next);
6695
*/
6696
break;
6697
}
6698
if (entity->open) {
6699
if (enc == parser->m_encoding) {
6700
/* It does not appear that this line can be executed.
6701
*
6702
* The "if (entity->open)" check catches recursive entity
6703
* definitions. In order to be called with an open
6704
* entity, it must have gone through this code before and
6705
* been through the recursive call to
6706
* appendAttributeValue() some lines below. That call
6707
* sets the local encoding ("enc") to the parser's
6708
* internal encoding (internal_utf8 or internal_utf16),
6709
* which can never be the same as the principle encoding.
6710
* It doesn't appear there is another code path that gets
6711
* here with entity->open being TRUE.
6712
*
6713
* Since it is not certain that this logic is watertight,
6714
* we keep the line and merely exclude it from coverage
6715
* tests.
6716
*/
6717
parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6718
}
6719
return XML_ERROR_RECURSIVE_ENTITY_REF;
6720
}
6721
if (entity->notation) {
6722
if (enc == parser->m_encoding)
6723
parser->m_eventPtr = ptr;
6724
return XML_ERROR_BINARY_ENTITY_REF;
6725
}
6726
if (! entity->textPtr) {
6727
if (enc == parser->m_encoding)
6728
parser->m_eventPtr = ptr;
6729
return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6730
} else {
6731
enum XML_Error result;
6732
result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE);
6733
if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) {
6734
*nextPtr = next;
6735
}
6736
return result;
6737
}
6738
} break;
6739
default:
6740
/* The only token returned by XmlAttributeValueTok() that does
6741
* not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6742
* Getting that would require an entity name to contain an
6743
* incomplete XML character (e.g. \xE2\x82); however previous
6744
* tokenisers will have already recognised and rejected such
6745
* names before XmlAttributeValueTok() gets a look-in. This
6746
* default case should be retained as a safety net, but the code
6747
* excluded from coverage tests.
6748
*
6749
* LCOV_EXCL_START
6750
*/
6751
if (enc == parser->m_encoding)
6752
parser->m_eventPtr = ptr;
6753
return XML_ERROR_UNEXPECTED_STATE;
6754
/* LCOV_EXCL_STOP */
6755
}
6756
ptr = next;
6757
}
6758
/* not reached */
6759
}
6760
6761
#if XML_GE == 1
6762
static enum XML_Error
6763
storeEntityValue(XML_Parser parser, const ENCODING *enc,
6764
const char *entityTextPtr, const char *entityTextEnd,
6765
enum XML_Account account, const char **nextPtr) {
6766
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6767
STRING_POOL *pool = &(dtd->entityValuePool);
6768
enum XML_Error result = XML_ERROR_NONE;
6769
# ifdef XML_DTD
6770
int oldInEntityValue = parser->m_prologState.inEntityValue;
6771
parser->m_prologState.inEntityValue = 1;
6772
# else
6773
UNUSED_P(account);
6774
# endif /* XML_DTD */
6775
/* never return Null for the value argument in EntityDeclHandler,
6776
since this would indicate an external entity; therefore we
6777
have to make sure that entityValuePool.start is not null */
6778
if (! pool->blocks) {
6779
if (! poolGrow(pool))
6780
return XML_ERROR_NO_MEMORY;
6781
}
6782
6783
const char *next;
6784
for (;;) {
6785
next
6786
= entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6787
int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6788
6789
if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6790
account)) {
6791
accountingOnAbort(parser);
6792
result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6793
goto endEntityValue;
6794
}
6795
6796
switch (tok) {
6797
case XML_TOK_PARAM_ENTITY_REF:
6798
# ifdef XML_DTD
6799
if (parser->m_isParamEntity || enc != parser->m_encoding) {
6800
const XML_Char *name;
6801
ENTITY *entity;
6802
name = poolStoreString(&parser->m_tempPool, enc,
6803
entityTextPtr + enc->minBytesPerChar,
6804
next - enc->minBytesPerChar);
6805
if (! name) {
6806
result = XML_ERROR_NO_MEMORY;
6807
goto endEntityValue;
6808
}
6809
entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6810
poolDiscard(&parser->m_tempPool);
6811
if (! entity) {
6812
/* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6813
/* cannot report skipped entity here - see comments on
6814
parser->m_skippedEntityHandler
6815
if (parser->m_skippedEntityHandler)
6816
parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6817
*/
6818
dtd->keepProcessing = dtd->standalone;
6819
goto endEntityValue;
6820
}
6821
if (entity->open || (entity == parser->m_declEntity)) {
6822
if (enc == parser->m_encoding)
6823
parser->m_eventPtr = entityTextPtr;
6824
result = XML_ERROR_RECURSIVE_ENTITY_REF;
6825
goto endEntityValue;
6826
}
6827
if (entity->systemId) {
6828
if (parser->m_externalEntityRefHandler) {
6829
dtd->paramEntityRead = XML_FALSE;
6830
entity->open = XML_TRUE;
6831
entityTrackingOnOpen(parser, entity, __LINE__);
6832
if (! parser->m_externalEntityRefHandler(
6833
parser->m_externalEntityRefHandlerArg, 0, entity->base,
6834
entity->systemId, entity->publicId)) {
6835
entityTrackingOnClose(parser, entity, __LINE__);
6836
entity->open = XML_FALSE;
6837
result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6838
goto endEntityValue;
6839
}
6840
entityTrackingOnClose(parser, entity, __LINE__);
6841
entity->open = XML_FALSE;
6842
if (! dtd->paramEntityRead)
6843
dtd->keepProcessing = dtd->standalone;
6844
} else
6845
dtd->keepProcessing = dtd->standalone;
6846
} else {
6847
result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE);
6848
goto endEntityValue;
6849
}
6850
break;
6851
}
6852
# endif /* XML_DTD */
6853
/* In the internal subset, PE references are not legal
6854
within markup declarations, e.g entity values in this case. */
6855
parser->m_eventPtr = entityTextPtr;
6856
result = XML_ERROR_PARAM_ENTITY_REF;
6857
goto endEntityValue;
6858
case XML_TOK_NONE:
6859
result = XML_ERROR_NONE;
6860
goto endEntityValue;
6861
case XML_TOK_ENTITY_REF:
6862
case XML_TOK_DATA_CHARS:
6863
if (! poolAppend(pool, enc, entityTextPtr, next)) {
6864
result = XML_ERROR_NO_MEMORY;
6865
goto endEntityValue;
6866
}
6867
break;
6868
case XML_TOK_TRAILING_CR:
6869
next = entityTextPtr + enc->minBytesPerChar;
6870
/* fall through */
6871
case XML_TOK_DATA_NEWLINE:
6872
if (pool->end == pool->ptr && ! poolGrow(pool)) {
6873
result = XML_ERROR_NO_MEMORY;
6874
goto endEntityValue;
6875
}
6876
*(pool->ptr)++ = 0xA;
6877
break;
6878
case XML_TOK_CHAR_REF: {
6879
XML_Char buf[XML_ENCODE_MAX];
6880
int i;
6881
int n = XmlCharRefNumber(enc, entityTextPtr);
6882
if (n < 0) {
6883
if (enc == parser->m_encoding)
6884
parser->m_eventPtr = entityTextPtr;
6885
result = XML_ERROR_BAD_CHAR_REF;
6886
goto endEntityValue;
6887
}
6888
n = XmlEncode(n, (ICHAR *)buf);
6889
/* The XmlEncode() functions can never return 0 here. That
6890
* error return happens if the code point passed in is either
6891
* negative or greater than or equal to 0x110000. The
6892
* XmlCharRefNumber() functions will all return a number
6893
* strictly less than 0x110000 or a negative value if an error
6894
* occurred. The negative value is intercepted above, so
6895
* XmlEncode() is never passed a value it might return an
6896
* error for.
6897
*/
6898
for (i = 0; i < n; i++) {
6899
if (pool->end == pool->ptr && ! poolGrow(pool)) {
6900
result = XML_ERROR_NO_MEMORY;
6901
goto endEntityValue;
6902
}
6903
*(pool->ptr)++ = buf[i];
6904
}
6905
} break;
6906
case XML_TOK_PARTIAL:
6907
if (enc == parser->m_encoding)
6908
parser->m_eventPtr = entityTextPtr;
6909
result = XML_ERROR_INVALID_TOKEN;
6910
goto endEntityValue;
6911
case XML_TOK_INVALID:
6912
if (enc == parser->m_encoding)
6913
parser->m_eventPtr = next;
6914
result = XML_ERROR_INVALID_TOKEN;
6915
goto endEntityValue;
6916
default:
6917
/* This default case should be unnecessary -- all the tokens
6918
* that XmlEntityValueTok() can return have their own explicit
6919
* cases -- but should be retained for safety. We do however
6920
* exclude it from the coverage statistics.
6921
*
6922
* LCOV_EXCL_START
6923
*/
6924
if (enc == parser->m_encoding)
6925
parser->m_eventPtr = entityTextPtr;
6926
result = XML_ERROR_UNEXPECTED_STATE;
6927
goto endEntityValue;
6928
/* LCOV_EXCL_STOP */
6929
}
6930
entityTextPtr = next;
6931
}
6932
endEntityValue:
6933
# ifdef XML_DTD
6934
parser->m_prologState.inEntityValue = oldInEntityValue;
6935
# endif /* XML_DTD */
6936
// If 'nextPtr' is given, it should be updated during the processing
6937
if (nextPtr != NULL) {
6938
*nextPtr = next;
6939
}
6940
return result;
6941
}
6942
6943
static enum XML_Error
6944
callStoreEntityValue(XML_Parser parser, const ENCODING *enc,
6945
const char *entityTextPtr, const char *entityTextEnd,
6946
enum XML_Account account) {
6947
const char *next = entityTextPtr;
6948
enum XML_Error result = XML_ERROR_NONE;
6949
while (1) {
6950
if (! parser->m_openValueEntities) {
6951
result
6952
= storeEntityValue(parser, enc, next, entityTextEnd, account, &next);
6953
} else {
6954
OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities;
6955
if (! openEntity)
6956
return XML_ERROR_UNEXPECTED_STATE;
6957
6958
ENTITY *const entity = openEntity->entity;
6959
const char *const textStart
6960
= ((const char *)entity->textPtr) + entity->processed;
6961
const char *const textEnd
6962
= (const char *)(entity->textPtr + entity->textLen);
6963
/* Set a safe default value in case 'next' does not get set */
6964
const char *nextInEntity = textStart;
6965
if (entity->hasMore) {
6966
result = storeEntityValue(parser, parser->m_internalEncoding, textStart,
6967
textEnd, XML_ACCOUNT_ENTITY_EXPANSION,
6968
&nextInEntity);
6969
if (result != XML_ERROR_NONE)
6970
break;
6971
// Check if entity is complete, if not, mark down how much of it is
6972
// processed. A XML_SUSPENDED check here is not required as
6973
// appendAttributeValue will never suspend the parser.
6974
if (textEnd != nextInEntity) {
6975
entity->processed
6976
= (int)(nextInEntity - (const char *)entity->textPtr);
6977
continue;
6978
}
6979
6980
// Entity is complete. We cannot close it here since we need to first
6981
// process its possible inner entities (which are added to the
6982
// m_openValueEntities during storeEntityValue)
6983
entity->hasMore = XML_FALSE;
6984
continue;
6985
} // End of entity processing, "if" block skips the rest
6986
6987
// Remove fully processed openEntity from open entity list.
6988
# if XML_GE == 1
6989
entityTrackingOnClose(parser, entity, __LINE__);
6990
# endif
6991
// openEntity is m_openValueEntities' head, since we set it at the
6992
// start of this function and because we skipped storeEntityValue call
6993
// with hasMore set to false. This means we can directly remove the head
6994
// of m_openValueEntities
6995
assert(parser->m_openValueEntities == openEntity);
6996
entity->open = XML_FALSE;
6997
parser->m_openValueEntities = parser->m_openValueEntities->next;
6998
6999
/* put openEntity back in list of free instances */
7000
openEntity->next = parser->m_freeValueEntities;
7001
parser->m_freeValueEntities = openEntity;
7002
}
7003
7004
// Break if an error occurred or there is nothing left to process
7005
if (result
7006
|| (parser->m_openValueEntities == NULL && entityTextEnd == next)) {
7007
break;
7008
}
7009
}
7010
7011
return result;
7012
}
7013
7014
#else /* XML_GE == 0 */
7015
7016
static enum XML_Error
7017
storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
7018
// This will store "&amp;entity123;" in entity->textPtr
7019
// to end up as "&entity123;" in the handler.
7020
const char *const entity_start = "&amp;";
7021
const char *const entity_end = ";";
7022
7023
STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
7024
if (! poolAppendString(pool, entity_start)
7025
|| ! poolAppendString(pool, entity->name)
7026
|| ! poolAppendString(pool, entity_end)) {
7027
poolDiscard(pool);
7028
return XML_ERROR_NO_MEMORY;
7029
}
7030
7031
entity->textPtr = poolStart(pool);
7032
entity->textLen = (int)(poolLength(pool));
7033
poolFinish(pool);
7034
7035
return XML_ERROR_NONE;
7036
}
7037
7038
#endif /* XML_GE == 0 */
7039
7040
static void FASTCALL
7041
normalizeLines(XML_Char *s) {
7042
XML_Char *p;
7043
for (;; s++) {
7044
if (*s == XML_T('\0'))
7045
return;
7046
if (*s == 0xD)
7047
break;
7048
}
7049
p = s;
7050
do {
7051
if (*s == 0xD) {
7052
*p++ = 0xA;
7053
if (*++s == 0xA)
7054
s++;
7055
} else
7056
*p++ = *s++;
7057
} while (*s);
7058
*p = XML_T('\0');
7059
}
7060
7061
static int
7062
reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
7063
const char *start, const char *end) {
7064
const XML_Char *target;
7065
XML_Char *data;
7066
const char *tem;
7067
if (! parser->m_processingInstructionHandler) {
7068
if (parser->m_defaultHandler)
7069
reportDefault(parser, enc, start, end);
7070
return 1;
7071
}
7072
start += enc->minBytesPerChar * 2;
7073
tem = start + XmlNameLength(enc, start);
7074
target = poolStoreString(&parser->m_tempPool, enc, start, tem);
7075
if (! target)
7076
return 0;
7077
poolFinish(&parser->m_tempPool);
7078
data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
7079
end - enc->minBytesPerChar * 2);
7080
if (! data)
7081
return 0;
7082
normalizeLines(data);
7083
parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
7084
poolClear(&parser->m_tempPool);
7085
return 1;
7086
}
7087
7088
static int
7089
reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
7090
const char *end) {
7091
XML_Char *data;
7092
if (! parser->m_commentHandler) {
7093
if (parser->m_defaultHandler)
7094
reportDefault(parser, enc, start, end);
7095
return 1;
7096
}
7097
data = poolStoreString(&parser->m_tempPool, enc,
7098
start + enc->minBytesPerChar * 4,
7099
end - enc->minBytesPerChar * 3);
7100
if (! data)
7101
return 0;
7102
normalizeLines(data);
7103
parser->m_commentHandler(parser->m_handlerArg, data);
7104
poolClear(&parser->m_tempPool);
7105
return 1;
7106
}
7107
7108
static void
7109
reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
7110
const char *end) {
7111
if (MUST_CONVERT(enc, s)) {
7112
enum XML_Convert_Result convert_res;
7113
const char **eventPP;
7114
const char **eventEndPP;
7115
if (enc == parser->m_encoding) {
7116
eventPP = &parser->m_eventPtr;
7117
eventEndPP = &parser->m_eventEndPtr;
7118
} else {
7119
/* To get here, two things must be true; the parser must be
7120
* using a character encoding that is not the same as the
7121
* encoding passed in, and the encoding passed in must need
7122
* conversion to the internal format (UTF-8 unless XML_UNICODE
7123
* is defined). The only occasions on which the encoding passed
7124
* in is not the same as the parser's encoding are when it is
7125
* the internal encoding (e.g. a previously defined parameter
7126
* entity, already converted to internal format). This by
7127
* definition doesn't need conversion, so the whole branch never
7128
* gets executed.
7129
*
7130
* For safety's sake we don't delete these lines and merely
7131
* exclude them from coverage statistics.
7132
*
7133
* LCOV_EXCL_START
7134
*/
7135
eventPP = &(parser->m_openInternalEntities->internalEventPtr);
7136
eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
7137
/* LCOV_EXCL_STOP */
7138
}
7139
do {
7140
ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
7141
convert_res
7142
= XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
7143
*eventEndPP = s;
7144
parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
7145
(int)(dataPtr - (ICHAR *)parser->m_dataBuf));
7146
*eventPP = s;
7147
} while ((convert_res != XML_CONVERT_COMPLETED)
7148
&& (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
7149
} else
7150
parser->m_defaultHandler(
7151
parser->m_handlerArg, (const XML_Char *)s,
7152
(int)((const XML_Char *)end - (const XML_Char *)s));
7153
}
7154
7155
static int
7156
defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
7157
XML_Bool isId, const XML_Char *value, XML_Parser parser) {
7158
DEFAULT_ATTRIBUTE *att;
7159
if (value || isId) {
7160
/* The handling of default attributes gets messed up if we have
7161
a default which duplicates a non-default. */
7162
int i;
7163
for (i = 0; i < type->nDefaultAtts; i++)
7164
if (attId == type->defaultAtts[i].id)
7165
return 1;
7166
if (isId && ! type->idAtt && ! attId->xmlns)
7167
type->idAtt = attId;
7168
}
7169
if (type->nDefaultAtts == type->allocDefaultAtts) {
7170
if (type->allocDefaultAtts == 0) {
7171
type->allocDefaultAtts = 8;
7172
type->defaultAtts
7173
= MALLOC(parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7174
if (! type->defaultAtts) {
7175
type->allocDefaultAtts = 0;
7176
return 0;
7177
}
7178
} else {
7179
DEFAULT_ATTRIBUTE *temp;
7180
7181
/* Detect and prevent integer overflow */
7182
if (type->allocDefaultAtts > INT_MAX / 2) {
7183
return 0;
7184
}
7185
7186
int count = type->allocDefaultAtts * 2;
7187
7188
/* Detect and prevent integer overflow.
7189
* The preprocessor guard addresses the "always false" warning
7190
* from -Wtype-limits on platforms where
7191
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7192
#if UINT_MAX >= SIZE_MAX
7193
if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
7194
return 0;
7195
}
7196
#endif
7197
7198
temp = REALLOC(parser, type->defaultAtts,
7199
(count * sizeof(DEFAULT_ATTRIBUTE)));
7200
if (temp == NULL)
7201
return 0;
7202
type->allocDefaultAtts = count;
7203
type->defaultAtts = temp;
7204
}
7205
}
7206
att = type->defaultAtts + type->nDefaultAtts;
7207
att->id = attId;
7208
att->value = value;
7209
att->isCdata = isCdata;
7210
if (! isCdata)
7211
attId->maybeTokenized = XML_TRUE;
7212
type->nDefaultAtts += 1;
7213
return 1;
7214
}
7215
7216
static int
7217
setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
7218
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7219
const XML_Char *name;
7220
for (name = elementType->name; *name; name++) {
7221
if (*name == XML_T(ASCII_COLON)) {
7222
PREFIX *prefix;
7223
const XML_Char *s;
7224
for (s = elementType->name; s != name; s++) {
7225
if (! poolAppendChar(&dtd->pool, *s))
7226
return 0;
7227
}
7228
if (! poolAppendChar(&dtd->pool, XML_T('\0')))
7229
return 0;
7230
prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
7231
sizeof(PREFIX));
7232
if (! prefix)
7233
return 0;
7234
if (prefix->name == poolStart(&dtd->pool))
7235
poolFinish(&dtd->pool);
7236
else
7237
poolDiscard(&dtd->pool);
7238
elementType->prefix = prefix;
7239
break;
7240
}
7241
}
7242
return 1;
7243
}
7244
7245
static ATTRIBUTE_ID *
7246
getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
7247
const char *end) {
7248
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7249
ATTRIBUTE_ID *id;
7250
const XML_Char *name;
7251
if (! poolAppendChar(&dtd->pool, XML_T('\0')))
7252
return NULL;
7253
name = poolStoreString(&dtd->pool, enc, start, end);
7254
if (! name)
7255
return NULL;
7256
/* skip quotation mark - its storage will be reused (like in name[-1]) */
7257
++name;
7258
id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
7259
sizeof(ATTRIBUTE_ID));
7260
if (! id)
7261
return NULL;
7262
if (id->name != name)
7263
poolDiscard(&dtd->pool);
7264
else {
7265
poolFinish(&dtd->pool);
7266
if (! parser->m_ns)
7267
;
7268
else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
7269
&& name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
7270
&& name[4] == XML_T(ASCII_s)
7271
&& (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
7272
if (name[5] == XML_T('\0'))
7273
id->prefix = &dtd->defaultPrefix;
7274
else
7275
id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
7276
sizeof(PREFIX));
7277
id->xmlns = XML_TRUE;
7278
} else {
7279
int i;
7280
for (i = 0; name[i]; i++) {
7281
/* attributes without prefix are *not* in the default namespace */
7282
if (name[i] == XML_T(ASCII_COLON)) {
7283
int j;
7284
for (j = 0; j < i; j++) {
7285
if (! poolAppendChar(&dtd->pool, name[j]))
7286
return NULL;
7287
}
7288
if (! poolAppendChar(&dtd->pool, XML_T('\0')))
7289
return NULL;
7290
id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
7291
poolStart(&dtd->pool), sizeof(PREFIX));
7292
if (! id->prefix)
7293
return NULL;
7294
if (id->prefix->name == poolStart(&dtd->pool))
7295
poolFinish(&dtd->pool);
7296
else
7297
poolDiscard(&dtd->pool);
7298
break;
7299
}
7300
}
7301
}
7302
}
7303
return id;
7304
}
7305
7306
#define CONTEXT_SEP XML_T(ASCII_FF)
7307
7308
static const XML_Char *
7309
getContext(XML_Parser parser) {
7310
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7311
HASH_TABLE_ITER iter;
7312
XML_Bool needSep = XML_FALSE;
7313
7314
if (dtd->defaultPrefix.binding) {
7315
int i;
7316
int len;
7317
if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
7318
return NULL;
7319
len = dtd->defaultPrefix.binding->uriLen;
7320
if (parser->m_namespaceSeparator)
7321
len--;
7322
for (i = 0; i < len; i++) {
7323
if (! poolAppendChar(&parser->m_tempPool,
7324
dtd->defaultPrefix.binding->uri[i])) {
7325
/* Because of memory caching, I don't believe this line can be
7326
* executed.
7327
*
7328
* This is part of a loop copying the default prefix binding
7329
* URI into the parser's temporary string pool. Previously,
7330
* that URI was copied into the same string pool, with a
7331
* terminating NUL character, as part of setContext(). When
7332
* the pool was cleared, that leaves a block definitely big
7333
* enough to hold the URI on the free block list of the pool.
7334
* The URI copy in getContext() therefore cannot run out of
7335
* memory.
7336
*
7337
* If the pool is used between the setContext() and
7338
* getContext() calls, the worst it can do is leave a bigger
7339
* block on the front of the free list. Given that this is
7340
* all somewhat inobvious and program logic can be changed, we
7341
* don't delete the line but we do exclude it from the test
7342
* coverage statistics.
7343
*/
7344
return NULL; /* LCOV_EXCL_LINE */
7345
}
7346
}
7347
needSep = XML_TRUE;
7348
}
7349
7350
hashTableIterInit(&iter, &(dtd->prefixes));
7351
for (;;) {
7352
int i;
7353
int len;
7354
const XML_Char *s;
7355
PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
7356
if (! prefix)
7357
break;
7358
if (! prefix->binding) {
7359
/* This test appears to be (justifiable) paranoia. There does
7360
* not seem to be a way of injecting a prefix without a binding
7361
* that doesn't get errored long before this function is called.
7362
* The test should remain for safety's sake, so we instead
7363
* exclude the following line from the coverage statistics.
7364
*/
7365
continue; /* LCOV_EXCL_LINE */
7366
}
7367
if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
7368
return NULL;
7369
for (s = prefix->name; *s; s++)
7370
if (! poolAppendChar(&parser->m_tempPool, *s))
7371
return NULL;
7372
if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
7373
return NULL;
7374
len = prefix->binding->uriLen;
7375
if (parser->m_namespaceSeparator)
7376
len--;
7377
for (i = 0; i < len; i++)
7378
if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
7379
return NULL;
7380
needSep = XML_TRUE;
7381
}
7382
7383
hashTableIterInit(&iter, &(dtd->generalEntities));
7384
for (;;) {
7385
const XML_Char *s;
7386
ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
7387
if (! e)
7388
break;
7389
if (! e->open)
7390
continue;
7391
if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
7392
return NULL;
7393
for (s = e->name; *s; s++)
7394
if (! poolAppendChar(&parser->m_tempPool, *s))
7395
return 0;
7396
needSep = XML_TRUE;
7397
}
7398
7399
if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7400
return NULL;
7401
return parser->m_tempPool.start;
7402
}
7403
7404
static XML_Bool
7405
setContext(XML_Parser parser, const XML_Char *context) {
7406
if (context == NULL) {
7407
return XML_FALSE;
7408
}
7409
7410
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7411
const XML_Char *s = context;
7412
7413
while (*context != XML_T('\0')) {
7414
if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
7415
ENTITY *e;
7416
if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7417
return XML_FALSE;
7418
e = (ENTITY *)lookup(parser, &dtd->generalEntities,
7419
poolStart(&parser->m_tempPool), 0);
7420
if (e)
7421
e->open = XML_TRUE;
7422
if (*s != XML_T('\0'))
7423
s++;
7424
context = s;
7425
poolDiscard(&parser->m_tempPool);
7426
} else if (*s == XML_T(ASCII_EQUALS)) {
7427
PREFIX *prefix;
7428
if (poolLength(&parser->m_tempPool) == 0)
7429
prefix = &dtd->defaultPrefix;
7430
else {
7431
if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7432
return XML_FALSE;
7433
prefix
7434
= (PREFIX *)lookup(parser, &dtd->prefixes,
7435
poolStart(&parser->m_tempPool), sizeof(PREFIX));
7436
if (! prefix)
7437
return XML_FALSE;
7438
if (prefix->name == poolStart(&parser->m_tempPool)) {
7439
prefix->name = poolCopyString(&dtd->pool, prefix->name);
7440
if (! prefix->name)
7441
return XML_FALSE;
7442
}
7443
poolDiscard(&parser->m_tempPool);
7444
}
7445
for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
7446
context++)
7447
if (! poolAppendChar(&parser->m_tempPool, *context))
7448
return XML_FALSE;
7449
if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7450
return XML_FALSE;
7451
if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
7452
&parser->m_inheritedBindings)
7453
!= XML_ERROR_NONE)
7454
return XML_FALSE;
7455
poolDiscard(&parser->m_tempPool);
7456
if (*context != XML_T('\0'))
7457
++context;
7458
s = context;
7459
} else {
7460
if (! poolAppendChar(&parser->m_tempPool, *s))
7461
return XML_FALSE;
7462
s++;
7463
}
7464
}
7465
return XML_TRUE;
7466
}
7467
7468
static void FASTCALL
7469
normalizePublicId(XML_Char *publicId) {
7470
XML_Char *p = publicId;
7471
XML_Char *s;
7472
for (s = publicId; *s; s++) {
7473
switch (*s) {
7474
case 0x20:
7475
case 0xD:
7476
case 0xA:
7477
if (p != publicId && p[-1] != 0x20)
7478
*p++ = 0x20;
7479
break;
7480
default:
7481
*p++ = *s;
7482
}
7483
}
7484
if (p != publicId && p[-1] == 0x20)
7485
--p;
7486
*p = XML_T('\0');
7487
}
7488
7489
static DTD *
7490
dtdCreate(XML_Parser parser) {
7491
DTD *p = MALLOC(parser, sizeof(DTD));
7492
if (p == NULL)
7493
return p;
7494
poolInit(&(p->pool), parser);
7495
poolInit(&(p->entityValuePool), parser);
7496
hashTableInit(&(p->generalEntities), parser);
7497
hashTableInit(&(p->elementTypes), parser);
7498
hashTableInit(&(p->attributeIds), parser);
7499
hashTableInit(&(p->prefixes), parser);
7500
#ifdef XML_DTD
7501
p->paramEntityRead = XML_FALSE;
7502
hashTableInit(&(p->paramEntities), parser);
7503
#endif /* XML_DTD */
7504
p->defaultPrefix.name = NULL;
7505
p->defaultPrefix.binding = NULL;
7506
7507
p->in_eldecl = XML_FALSE;
7508
p->scaffIndex = NULL;
7509
p->scaffold = NULL;
7510
p->scaffLevel = 0;
7511
p->scaffSize = 0;
7512
p->scaffCount = 0;
7513
p->contentStringLen = 0;
7514
7515
p->keepProcessing = XML_TRUE;
7516
p->hasParamEntityRefs = XML_FALSE;
7517
p->standalone = XML_FALSE;
7518
return p;
7519
}
7520
7521
static void
7522
dtdReset(DTD *p, XML_Parser parser) {
7523
HASH_TABLE_ITER iter;
7524
hashTableIterInit(&iter, &(p->elementTypes));
7525
for (;;) {
7526
ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7527
if (! e)
7528
break;
7529
if (e->allocDefaultAtts != 0)
7530
FREE(parser, e->defaultAtts);
7531
}
7532
hashTableClear(&(p->generalEntities));
7533
#ifdef XML_DTD
7534
p->paramEntityRead = XML_FALSE;
7535
hashTableClear(&(p->paramEntities));
7536
#endif /* XML_DTD */
7537
hashTableClear(&(p->elementTypes));
7538
hashTableClear(&(p->attributeIds));
7539
hashTableClear(&(p->prefixes));
7540
poolClear(&(p->pool));
7541
poolClear(&(p->entityValuePool));
7542
p->defaultPrefix.name = NULL;
7543
p->defaultPrefix.binding = NULL;
7544
7545
p->in_eldecl = XML_FALSE;
7546
7547
FREE(parser, p->scaffIndex);
7548
p->scaffIndex = NULL;
7549
FREE(parser, p->scaffold);
7550
p->scaffold = NULL;
7551
7552
p->scaffLevel = 0;
7553
p->scaffSize = 0;
7554
p->scaffCount = 0;
7555
p->contentStringLen = 0;
7556
7557
p->keepProcessing = XML_TRUE;
7558
p->hasParamEntityRefs = XML_FALSE;
7559
p->standalone = XML_FALSE;
7560
}
7561
7562
static void
7563
dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser) {
7564
HASH_TABLE_ITER iter;
7565
hashTableIterInit(&iter, &(p->elementTypes));
7566
for (;;) {
7567
ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7568
if (! e)
7569
break;
7570
if (e->allocDefaultAtts != 0)
7571
FREE(parser, e->defaultAtts);
7572
}
7573
hashTableDestroy(&(p->generalEntities));
7574
#ifdef XML_DTD
7575
hashTableDestroy(&(p->paramEntities));
7576
#endif /* XML_DTD */
7577
hashTableDestroy(&(p->elementTypes));
7578
hashTableDestroy(&(p->attributeIds));
7579
hashTableDestroy(&(p->prefixes));
7580
poolDestroy(&(p->pool));
7581
poolDestroy(&(p->entityValuePool));
7582
if (isDocEntity) {
7583
FREE(parser, p->scaffIndex);
7584
FREE(parser, p->scaffold);
7585
}
7586
FREE(parser, p);
7587
}
7588
7589
/* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
7590
The new DTD has already been initialized.
7591
*/
7592
static int
7593
dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
7594
XML_Parser parser) {
7595
HASH_TABLE_ITER iter;
7596
7597
/* Copy the prefix table. */
7598
7599
hashTableIterInit(&iter, &(oldDtd->prefixes));
7600
for (;;) {
7601
const XML_Char *name;
7602
const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
7603
if (! oldP)
7604
break;
7605
name = poolCopyString(&(newDtd->pool), oldP->name);
7606
if (! name)
7607
return 0;
7608
if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
7609
return 0;
7610
}
7611
7612
hashTableIterInit(&iter, &(oldDtd->attributeIds));
7613
7614
/* Copy the attribute id table. */
7615
7616
for (;;) {
7617
ATTRIBUTE_ID *newA;
7618
const XML_Char *name;
7619
const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
7620
7621
if (! oldA)
7622
break;
7623
/* Remember to allocate the scratch byte before the name. */
7624
if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
7625
return 0;
7626
name = poolCopyString(&(newDtd->pool), oldA->name);
7627
if (! name)
7628
return 0;
7629
++name;
7630
newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
7631
sizeof(ATTRIBUTE_ID));
7632
if (! newA)
7633
return 0;
7634
newA->maybeTokenized = oldA->maybeTokenized;
7635
if (oldA->prefix) {
7636
newA->xmlns = oldA->xmlns;
7637
if (oldA->prefix == &oldDtd->defaultPrefix)
7638
newA->prefix = &newDtd->defaultPrefix;
7639
else
7640
newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7641
oldA->prefix->name, 0);
7642
}
7643
}
7644
7645
/* Copy the element type table. */
7646
7647
hashTableIterInit(&iter, &(oldDtd->elementTypes));
7648
7649
for (;;) {
7650
int i;
7651
ELEMENT_TYPE *newE;
7652
const XML_Char *name;
7653
const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7654
if (! oldE)
7655
break;
7656
name = poolCopyString(&(newDtd->pool), oldE->name);
7657
if (! name)
7658
return 0;
7659
newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7660
sizeof(ELEMENT_TYPE));
7661
if (! newE)
7662
return 0;
7663
if (oldE->nDefaultAtts) {
7664
/* Detect and prevent integer overflow.
7665
* The preprocessor guard addresses the "always false" warning
7666
* from -Wtype-limits on platforms where
7667
* sizeof(int) < sizeof(size_t), e.g. on x86_64. */
7668
#if UINT_MAX >= SIZE_MAX
7669
if ((size_t)oldE->nDefaultAtts
7670
> ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) {
7671
return 0;
7672
}
7673
#endif
7674
newE->defaultAtts
7675
= MALLOC(parser, oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7676
if (! newE->defaultAtts) {
7677
return 0;
7678
}
7679
}
7680
if (oldE->idAtt)
7681
newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7682
oldE->idAtt->name, 0);
7683
newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7684
if (oldE->prefix)
7685
newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7686
oldE->prefix->name, 0);
7687
for (i = 0; i < newE->nDefaultAtts; i++) {
7688
newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7689
oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
7690
newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7691
if (oldE->defaultAtts[i].value) {
7692
newE->defaultAtts[i].value
7693
= poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7694
if (! newE->defaultAtts[i].value)
7695
return 0;
7696
} else
7697
newE->defaultAtts[i].value = NULL;
7698
}
7699
}
7700
7701
/* Copy the entity tables. */
7702
if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7703
&(oldDtd->generalEntities)))
7704
return 0;
7705
7706
#ifdef XML_DTD
7707
if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7708
&(oldDtd->paramEntities)))
7709
return 0;
7710
newDtd->paramEntityRead = oldDtd->paramEntityRead;
7711
#endif /* XML_DTD */
7712
7713
newDtd->keepProcessing = oldDtd->keepProcessing;
7714
newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7715
newDtd->standalone = oldDtd->standalone;
7716
7717
/* Don't want deep copying for scaffolding */
7718
newDtd->in_eldecl = oldDtd->in_eldecl;
7719
newDtd->scaffold = oldDtd->scaffold;
7720
newDtd->contentStringLen = oldDtd->contentStringLen;
7721
newDtd->scaffSize = oldDtd->scaffSize;
7722
newDtd->scaffLevel = oldDtd->scaffLevel;
7723
newDtd->scaffIndex = oldDtd->scaffIndex;
7724
7725
return 1;
7726
} /* End dtdCopy */
7727
7728
static int
7729
copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7730
STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7731
HASH_TABLE_ITER iter;
7732
const XML_Char *cachedOldBase = NULL;
7733
const XML_Char *cachedNewBase = NULL;
7734
7735
hashTableIterInit(&iter, oldTable);
7736
7737
for (;;) {
7738
ENTITY *newE;
7739
const XML_Char *name;
7740
const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7741
if (! oldE)
7742
break;
7743
name = poolCopyString(newPool, oldE->name);
7744
if (! name)
7745
return 0;
7746
newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7747
if (! newE)
7748
return 0;
7749
if (oldE->systemId) {
7750
const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7751
if (! tem)
7752
return 0;
7753
newE->systemId = tem;
7754
if (oldE->base) {
7755
if (oldE->base == cachedOldBase)
7756
newE->base = cachedNewBase;
7757
else {
7758
cachedOldBase = oldE->base;
7759
tem = poolCopyString(newPool, cachedOldBase);
7760
if (! tem)
7761
return 0;
7762
cachedNewBase = newE->base = tem;
7763
}
7764
}
7765
if (oldE->publicId) {
7766
tem = poolCopyString(newPool, oldE->publicId);
7767
if (! tem)
7768
return 0;
7769
newE->publicId = tem;
7770
}
7771
} else {
7772
const XML_Char *tem
7773
= poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7774
if (! tem)
7775
return 0;
7776
newE->textPtr = tem;
7777
newE->textLen = oldE->textLen;
7778
}
7779
if (oldE->notation) {
7780
const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7781
if (! tem)
7782
return 0;
7783
newE->notation = tem;
7784
}
7785
newE->is_param = oldE->is_param;
7786
newE->is_internal = oldE->is_internal;
7787
}
7788
return 1;
7789
}
7790
7791
#define INIT_POWER 6
7792
7793
static XML_Bool FASTCALL
7794
keyeq(KEY s1, KEY s2) {
7795
for (; *s1 == *s2; s1++, s2++)
7796
if (*s1 == 0)
7797
return XML_TRUE;
7798
return XML_FALSE;
7799
}
7800
7801
static size_t
7802
keylen(KEY s) {
7803
size_t len = 0;
7804
for (; *s; s++, len++)
7805
;
7806
return len;
7807
}
7808
7809
static void
7810
copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7811
key->k[0] = 0;
7812
key->k[1] = get_hash_secret_salt(parser);
7813
}
7814
7815
static unsigned long FASTCALL
7816
hash(XML_Parser parser, KEY s) {
7817
struct siphash state;
7818
struct sipkey key;
7819
(void)sip24_valid;
7820
copy_salt_to_sipkey(parser, &key);
7821
sip24_init(&state, &key);
7822
sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7823
return (unsigned long)sip24_final(&state);
7824
}
7825
7826
static NAMED *
7827
lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7828
size_t i;
7829
if (table->size == 0) {
7830
size_t tsize;
7831
if (! createSize)
7832
return NULL;
7833
table->power = INIT_POWER;
7834
/* table->size is a power of 2 */
7835
table->size = (size_t)1 << INIT_POWER;
7836
tsize = table->size * sizeof(NAMED *);
7837
table->v = MALLOC(table->parser, tsize);
7838
if (! table->v) {
7839
table->size = 0;
7840
return NULL;
7841
}
7842
memset(table->v, 0, tsize);
7843
i = hash(parser, name) & ((unsigned long)table->size - 1);
7844
} else {
7845
unsigned long h = hash(parser, name);
7846
unsigned long mask = (unsigned long)table->size - 1;
7847
unsigned char step = 0;
7848
i = h & mask;
7849
while (table->v[i]) {
7850
if (keyeq(name, table->v[i]->name))
7851
return table->v[i];
7852
if (! step)
7853
step = PROBE_STEP(h, mask, table->power);
7854
i < step ? (i += table->size - step) : (i -= step);
7855
}
7856
if (! createSize)
7857
return NULL;
7858
7859
/* check for overflow (table is half full) */
7860
if (table->used >> (table->power - 1)) {
7861
unsigned char newPower = table->power + 1;
7862
7863
/* Detect and prevent invalid shift */
7864
if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7865
return NULL;
7866
}
7867
7868
size_t newSize = (size_t)1 << newPower;
7869
unsigned long newMask = (unsigned long)newSize - 1;
7870
7871
/* Detect and prevent integer overflow */
7872
if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
7873
return NULL;
7874
}
7875
7876
size_t tsize = newSize * sizeof(NAMED *);
7877
NAMED **newV = MALLOC(table->parser, tsize);
7878
if (! newV)
7879
return NULL;
7880
memset(newV, 0, tsize);
7881
for (i = 0; i < table->size; i++)
7882
if (table->v[i]) {
7883
unsigned long newHash = hash(parser, table->v[i]->name);
7884
size_t j = newHash & newMask;
7885
step = 0;
7886
while (newV[j]) {
7887
if (! step)
7888
step = PROBE_STEP(newHash, newMask, newPower);
7889
j < step ? (j += newSize - step) : (j -= step);
7890
}
7891
newV[j] = table->v[i];
7892
}
7893
FREE(table->parser, table->v);
7894
table->v = newV;
7895
table->power = newPower;
7896
table->size = newSize;
7897
i = h & newMask;
7898
step = 0;
7899
while (table->v[i]) {
7900
if (! step)
7901
step = PROBE_STEP(h, newMask, newPower);
7902
i < step ? (i += newSize - step) : (i -= step);
7903
}
7904
}
7905
}
7906
table->v[i] = MALLOC(table->parser, createSize);
7907
if (! table->v[i])
7908
return NULL;
7909
memset(table->v[i], 0, createSize);
7910
table->v[i]->name = name;
7911
(table->used)++;
7912
return table->v[i];
7913
}
7914
7915
static void FASTCALL
7916
hashTableClear(HASH_TABLE *table) {
7917
size_t i;
7918
for (i = 0; i < table->size; i++) {
7919
FREE(table->parser, table->v[i]);
7920
table->v[i] = NULL;
7921
}
7922
table->used = 0;
7923
}
7924
7925
static void FASTCALL
7926
hashTableDestroy(HASH_TABLE *table) {
7927
size_t i;
7928
for (i = 0; i < table->size; i++)
7929
FREE(table->parser, table->v[i]);
7930
FREE(table->parser, table->v);
7931
}
7932
7933
static void FASTCALL
7934
hashTableInit(HASH_TABLE *p, XML_Parser parser) {
7935
p->power = 0;
7936
p->size = 0;
7937
p->used = 0;
7938
p->v = NULL;
7939
p->parser = parser;
7940
}
7941
7942
static void FASTCALL
7943
hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7944
iter->p = table->v;
7945
iter->end = iter->p ? iter->p + table->size : NULL;
7946
}
7947
7948
static NAMED *FASTCALL
7949
hashTableIterNext(HASH_TABLE_ITER *iter) {
7950
while (iter->p != iter->end) {
7951
NAMED *tem = *(iter->p)++;
7952
if (tem)
7953
return tem;
7954
}
7955
return NULL;
7956
}
7957
7958
static void FASTCALL
7959
poolInit(STRING_POOL *pool, XML_Parser parser) {
7960
pool->blocks = NULL;
7961
pool->freeBlocks = NULL;
7962
pool->start = NULL;
7963
pool->ptr = NULL;
7964
pool->end = NULL;
7965
pool->parser = parser;
7966
}
7967
7968
static void FASTCALL
7969
poolClear(STRING_POOL *pool) {
7970
if (! pool->freeBlocks)
7971
pool->freeBlocks = pool->blocks;
7972
else {
7973
BLOCK *p = pool->blocks;
7974
while (p) {
7975
BLOCK *tem = p->next;
7976
p->next = pool->freeBlocks;
7977
pool->freeBlocks = p;
7978
p = tem;
7979
}
7980
}
7981
pool->blocks = NULL;
7982
pool->start = NULL;
7983
pool->ptr = NULL;
7984
pool->end = NULL;
7985
}
7986
7987
static void FASTCALL
7988
poolDestroy(STRING_POOL *pool) {
7989
BLOCK *p = pool->blocks;
7990
while (p) {
7991
BLOCK *tem = p->next;
7992
FREE(pool->parser, p);
7993
p = tem;
7994
}
7995
p = pool->freeBlocks;
7996
while (p) {
7997
BLOCK *tem = p->next;
7998
FREE(pool->parser, p);
7999
p = tem;
8000
}
8001
}
8002
8003
static XML_Char *
8004
poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
8005
const char *end) {
8006
if (! pool->ptr && ! poolGrow(pool))
8007
return NULL;
8008
for (;;) {
8009
const enum XML_Convert_Result convert_res = XmlConvert(
8010
enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
8011
if ((convert_res == XML_CONVERT_COMPLETED)
8012
|| (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
8013
break;
8014
if (! poolGrow(pool))
8015
return NULL;
8016
}
8017
return pool->start;
8018
}
8019
8020
static const XML_Char *FASTCALL
8021
poolCopyString(STRING_POOL *pool, const XML_Char *s) {
8022
do {
8023
if (! poolAppendChar(pool, *s))
8024
return NULL;
8025
} while (*s++);
8026
s = pool->start;
8027
poolFinish(pool);
8028
return s;
8029
}
8030
8031
static const XML_Char *
8032
poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
8033
if (! pool->ptr && ! poolGrow(pool)) {
8034
/* The following line is unreachable given the current usage of
8035
* poolCopyStringN(). Currently it is called from exactly one
8036
* place to copy the text of a simple general entity. By that
8037
* point, the name of the entity is already stored in the pool, so
8038
* pool->ptr cannot be NULL.
8039
*
8040
* If poolCopyStringN() is used elsewhere as it well might be,
8041
* this line may well become executable again. Regardless, this
8042
* sort of check shouldn't be removed lightly, so we just exclude
8043
* it from the coverage statistics.
8044
*/
8045
return NULL; /* LCOV_EXCL_LINE */
8046
}
8047
for (; n > 0; --n, s++) {
8048
if (! poolAppendChar(pool, *s))
8049
return NULL;
8050
}
8051
s = pool->start;
8052
poolFinish(pool);
8053
return s;
8054
}
8055
8056
static const XML_Char *FASTCALL
8057
poolAppendString(STRING_POOL *pool, const XML_Char *s) {
8058
while (*s) {
8059
if (! poolAppendChar(pool, *s))
8060
return NULL;
8061
s++;
8062
}
8063
return pool->start;
8064
}
8065
8066
static XML_Char *
8067
poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
8068
const char *end) {
8069
if (! poolAppend(pool, enc, ptr, end))
8070
return NULL;
8071
if (pool->ptr == pool->end && ! poolGrow(pool))
8072
return NULL;
8073
*(pool->ptr)++ = 0;
8074
return pool->start;
8075
}
8076
8077
static size_t
8078
poolBytesToAllocateFor(int blockSize) {
8079
/* Unprotected math would be:
8080
** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
8081
**
8082
** Detect overflow, avoiding _signed_ overflow undefined behavior
8083
** For a + b * c we check b * c in isolation first, so that addition of a
8084
** on top has no chance of making us accept a small non-negative number
8085
*/
8086
const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
8087
8088
if (blockSize <= 0)
8089
return 0;
8090
8091
if (blockSize > (int)(INT_MAX / stretch))
8092
return 0;
8093
8094
{
8095
const int stretchedBlockSize = blockSize * (int)stretch;
8096
const int bytesToAllocate
8097
= (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
8098
if (bytesToAllocate < 0)
8099
return 0;
8100
8101
return (size_t)bytesToAllocate;
8102
}
8103
}
8104
8105
static XML_Bool FASTCALL
8106
poolGrow(STRING_POOL *pool) {
8107
if (pool->freeBlocks) {
8108
if (pool->start == 0) {
8109
pool->blocks = pool->freeBlocks;
8110
pool->freeBlocks = pool->freeBlocks->next;
8111
pool->blocks->next = NULL;
8112
pool->start = pool->blocks->s;
8113
pool->end = pool->start + pool->blocks->size;
8114
pool->ptr = pool->start;
8115
return XML_TRUE;
8116
}
8117
if (pool->end - pool->start < pool->freeBlocks->size) {
8118
BLOCK *tem = pool->freeBlocks->next;
8119
pool->freeBlocks->next = pool->blocks;
8120
pool->blocks = pool->freeBlocks;
8121
pool->freeBlocks = tem;
8122
memcpy(pool->blocks->s, pool->start,
8123
(pool->end - pool->start) * sizeof(XML_Char));
8124
pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
8125
pool->start = pool->blocks->s;
8126
pool->end = pool->start + pool->blocks->size;
8127
return XML_TRUE;
8128
}
8129
}
8130
if (pool->blocks && pool->start == pool->blocks->s) {
8131
BLOCK *temp;
8132
int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
8133
size_t bytesToAllocate;
8134
8135
/* NOTE: Needs to be calculated prior to calling `realloc`
8136
to avoid dangling pointers: */
8137
const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
8138
8139
if (blockSize < 0) {
8140
/* This condition traps a situation where either more than
8141
* INT_MAX/2 bytes have already been allocated. This isn't
8142
* readily testable, since it is unlikely that an average
8143
* machine will have that much memory, so we exclude it from the
8144
* coverage statistics.
8145
*/
8146
return XML_FALSE; /* LCOV_EXCL_LINE */
8147
}
8148
8149
bytesToAllocate = poolBytesToAllocateFor(blockSize);
8150
if (bytesToAllocate == 0)
8151
return XML_FALSE;
8152
8153
temp = REALLOC(pool->parser, pool->blocks, bytesToAllocate);
8154
if (temp == NULL)
8155
return XML_FALSE;
8156
pool->blocks = temp;
8157
pool->blocks->size = blockSize;
8158
pool->ptr = pool->blocks->s + offsetInsideBlock;
8159
pool->start = pool->blocks->s;
8160
pool->end = pool->start + blockSize;
8161
} else {
8162
BLOCK *tem;
8163
int blockSize = (int)(pool->end - pool->start);
8164
size_t bytesToAllocate;
8165
8166
if (blockSize < 0) {
8167
/* This condition traps a situation where either more than
8168
* INT_MAX bytes have already been allocated (which is prevented
8169
* by various pieces of program logic, not least this one, never
8170
* mind the unlikelihood of actually having that much memory) or
8171
* the pool control fields have been corrupted (which could
8172
* conceivably happen in an extremely buggy user handler
8173
* function). Either way it isn't readily testable, so we
8174
* exclude it from the coverage statistics.
8175
*/
8176
return XML_FALSE; /* LCOV_EXCL_LINE */
8177
}
8178
8179
if (blockSize < INIT_BLOCK_SIZE)
8180
blockSize = INIT_BLOCK_SIZE;
8181
else {
8182
/* Detect overflow, avoiding _signed_ overflow undefined behavior */
8183
if ((int)((unsigned)blockSize * 2U) < 0) {
8184
return XML_FALSE;
8185
}
8186
blockSize *= 2;
8187
}
8188
8189
bytesToAllocate = poolBytesToAllocateFor(blockSize);
8190
if (bytesToAllocate == 0)
8191
return XML_FALSE;
8192
8193
tem = MALLOC(pool->parser, bytesToAllocate);
8194
if (! tem)
8195
return XML_FALSE;
8196
tem->size = blockSize;
8197
tem->next = pool->blocks;
8198
pool->blocks = tem;
8199
if (pool->ptr != pool->start)
8200
memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
8201
pool->ptr = tem->s + (pool->ptr - pool->start);
8202
pool->start = tem->s;
8203
pool->end = tem->s + blockSize;
8204
}
8205
return XML_TRUE;
8206
}
8207
8208
static int FASTCALL
8209
nextScaffoldPart(XML_Parser parser) {
8210
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8211
CONTENT_SCAFFOLD *me;
8212
int next;
8213
8214
if (! dtd->scaffIndex) {
8215
/* Detect and prevent integer overflow.
8216
* The preprocessor guard addresses the "always false" warning
8217
* from -Wtype-limits on platforms where
8218
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
8219
#if UINT_MAX >= SIZE_MAX
8220
if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) {
8221
return -1;
8222
}
8223
#endif
8224
dtd->scaffIndex = MALLOC(parser, parser->m_groupSize * sizeof(int));
8225
if (! dtd->scaffIndex)
8226
return -1;
8227
dtd->scaffIndex[0] = 0;
8228
}
8229
8230
// Will casting to int be safe further down?
8231
if (dtd->scaffCount > INT_MAX) {
8232
return -1;
8233
}
8234
8235
if (dtd->scaffCount >= dtd->scaffSize) {
8236
CONTENT_SCAFFOLD *temp;
8237
if (dtd->scaffold) {
8238
/* Detect and prevent integer overflow */
8239
if (dtd->scaffSize > UINT_MAX / 2u) {
8240
return -1;
8241
}
8242
/* Detect and prevent integer overflow.
8243
* The preprocessor guard addresses the "always false" warning
8244
* from -Wtype-limits on platforms where
8245
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
8246
#if UINT_MAX >= SIZE_MAX
8247
if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
8248
return -1;
8249
}
8250
#endif
8251
8252
temp = REALLOC(parser, dtd->scaffold,
8253
dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
8254
if (temp == NULL)
8255
return -1;
8256
dtd->scaffSize *= 2;
8257
} else {
8258
temp = MALLOC(parser, INIT_SCAFFOLD_ELEMENTS * sizeof(CONTENT_SCAFFOLD));
8259
if (temp == NULL)
8260
return -1;
8261
dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
8262
}
8263
dtd->scaffold = temp;
8264
}
8265
next = (int)dtd->scaffCount++;
8266
me = &dtd->scaffold[next];
8267
if (dtd->scaffLevel) {
8268
CONTENT_SCAFFOLD *parent
8269
= &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
8270
if (parent->lastchild) {
8271
dtd->scaffold[parent->lastchild].nextsib = next;
8272
}
8273
if (! parent->childcnt)
8274
parent->firstchild = next;
8275
parent->lastchild = next;
8276
parent->childcnt++;
8277
}
8278
me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
8279
return next;
8280
}
8281
8282
static XML_Content *
8283
build_model(XML_Parser parser) {
8284
/* Function build_model transforms the existing parser->m_dtd->scaffold
8285
* array of CONTENT_SCAFFOLD tree nodes into a new array of
8286
* XML_Content tree nodes followed by a gapless list of zero-terminated
8287
* strings. */
8288
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8289
XML_Content *ret;
8290
XML_Char *str; /* the current string writing location */
8291
8292
/* Detect and prevent integer overflow.
8293
* The preprocessor guard addresses the "always false" warning
8294
* from -Wtype-limits on platforms where
8295
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
8296
#if UINT_MAX >= SIZE_MAX
8297
if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
8298
return NULL;
8299
}
8300
if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
8301
return NULL;
8302
}
8303
#endif
8304
if (dtd->scaffCount * sizeof(XML_Content)
8305
> (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
8306
return NULL;
8307
}
8308
8309
const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
8310
+ (dtd->contentStringLen * sizeof(XML_Char)));
8311
8312
// NOTE: We are avoiding MALLOC(..) here to so that
8313
// applications that are not using XML_FreeContentModel but plain
8314
// free(..) or .free_fcn() to free the content model's memory are safe.
8315
ret = parser->m_mem.malloc_fcn(allocsize);
8316
if (! ret)
8317
return NULL;
8318
8319
/* What follows is an iterative implementation (of what was previously done
8320
* recursively in a dedicated function called "build_node". The old recursive
8321
* build_node could be forced into stack exhaustion from input as small as a
8322
* few megabyte, and so that was a security issue. Hence, a function call
8323
* stack is avoided now by resolving recursion.)
8324
*
8325
* The iterative approach works as follows:
8326
*
8327
* - We have two writing pointers, both walking up the result array; one does
8328
* the work, the other creates "jobs" for its colleague to do, and leads
8329
* the way:
8330
*
8331
* - The faster one, pointer jobDest, always leads and writes "what job
8332
* to do" by the other, once they reach that place in the
8333
* array: leader "jobDest" stores the source node array index (relative
8334
* to array dtd->scaffold) in field "numchildren".
8335
*
8336
* - The slower one, pointer dest, looks at the value stored in the
8337
* "numchildren" field (which actually holds a source node array index
8338
* at that time) and puts the real data from dtd->scaffold in.
8339
*
8340
* - Before the loop starts, jobDest writes source array index 0
8341
* (where the root node is located) so that dest will have something to do
8342
* when it starts operation.
8343
*
8344
* - Whenever nodes with children are encountered, jobDest appends
8345
* them as new jobs, in order. As a result, tree node siblings are
8346
* adjacent in the resulting array, for example:
8347
*
8348
* [0] root, has two children
8349
* [1] first child of 0, has three children
8350
* [3] first child of 1, does not have children
8351
* [4] second child of 1, does not have children
8352
* [5] third child of 1, does not have children
8353
* [2] second child of 0, does not have children
8354
*
8355
* Or (the same data) presented in flat array view:
8356
*
8357
* [0] root, has two children
8358
*
8359
* [1] first child of 0, has three children
8360
* [2] second child of 0, does not have children
8361
*
8362
* [3] first child of 1, does not have children
8363
* [4] second child of 1, does not have children
8364
* [5] third child of 1, does not have children
8365
*
8366
* - The algorithm repeats until all target array indices have been processed.
8367
*/
8368
XML_Content *dest = ret; /* tree node writing location, moves upwards */
8369
XML_Content *const destLimit = &ret[dtd->scaffCount];
8370
XML_Content *jobDest = ret; /* next free writing location in target array */
8371
str = (XML_Char *)&ret[dtd->scaffCount];
8372
8373
/* Add the starting job, the root node (index 0) of the source tree */
8374
(jobDest++)->numchildren = 0;
8375
8376
for (; dest < destLimit; dest++) {
8377
/* Retrieve source tree array index from job storage */
8378
const int src_node = (int)dest->numchildren;
8379
8380
/* Convert item */
8381
dest->type = dtd->scaffold[src_node].type;
8382
dest->quant = dtd->scaffold[src_node].quant;
8383
if (dest->type == XML_CTYPE_NAME) {
8384
const XML_Char *src;
8385
dest->name = str;
8386
src = dtd->scaffold[src_node].name;
8387
for (;;) {
8388
*str++ = *src;
8389
if (! *src)
8390
break;
8391
src++;
8392
}
8393
dest->numchildren = 0;
8394
dest->children = NULL;
8395
} else {
8396
unsigned int i;
8397
int cn;
8398
dest->name = NULL;
8399
dest->numchildren = dtd->scaffold[src_node].childcnt;
8400
dest->children = jobDest;
8401
8402
/* Append scaffold indices of children to array */
8403
for (i = 0, cn = dtd->scaffold[src_node].firstchild;
8404
i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
8405
(jobDest++)->numchildren = (unsigned int)cn;
8406
}
8407
}
8408
8409
return ret;
8410
}
8411
8412
static ELEMENT_TYPE *
8413
getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
8414
const char *end) {
8415
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8416
const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
8417
ELEMENT_TYPE *ret;
8418
8419
if (! name)
8420
return NULL;
8421
ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
8422
sizeof(ELEMENT_TYPE));
8423
if (! ret)
8424
return NULL;
8425
if (ret->name != name)
8426
poolDiscard(&dtd->pool);
8427
else {
8428
poolFinish(&dtd->pool);
8429
if (! setElementTypePrefix(parser, ret))
8430
return NULL;
8431
}
8432
return ret;
8433
}
8434
8435
static XML_Char *
8436
copyString(const XML_Char *s, XML_Parser parser) {
8437
size_t charsRequired = 0;
8438
XML_Char *result;
8439
8440
/* First determine how long the string is */
8441
while (s[charsRequired] != 0) {
8442
charsRequired++;
8443
}
8444
/* Include the terminator */
8445
charsRequired++;
8446
8447
/* Now allocate space for the copy */
8448
result = MALLOC(parser, charsRequired * sizeof(XML_Char));
8449
if (result == NULL)
8450
return NULL;
8451
/* Copy the original into place */
8452
memcpy(result, s, charsRequired * sizeof(XML_Char));
8453
return result;
8454
}
8455
8456
#if XML_GE == 1
8457
8458
static float
8459
accountingGetCurrentAmplification(XML_Parser rootParser) {
8460
// 1.........1.........12 => 22
8461
const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
8462
const XmlBigCount countBytesOutput
8463
= rootParser->m_accounting.countBytesDirect
8464
+ rootParser->m_accounting.countBytesIndirect;
8465
const float amplificationFactor
8466
= rootParser->m_accounting.countBytesDirect
8467
? ((float)countBytesOutput
8468
/ (float)(rootParser->m_accounting.countBytesDirect))
8469
: ((float)(lenOfShortestInclude
8470
+ rootParser->m_accounting.countBytesIndirect)
8471
/ (float)lenOfShortestInclude);
8472
assert(! rootParser->m_parentParser);
8473
return amplificationFactor;
8474
}
8475
8476
static void
8477
accountingReportStats(XML_Parser originParser, const char *epilog) {
8478
const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8479
assert(! rootParser->m_parentParser);
8480
8481
if (rootParser->m_accounting.debugLevel == 0u) {
8482
return;
8483
}
8484
8485
const float amplificationFactor
8486
= accountingGetCurrentAmplification(rootParser);
8487
fprintf(stderr,
8488
"expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
8489
"10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
8490
(void *)rootParser, rootParser->m_accounting.countBytesDirect,
8491
rootParser->m_accounting.countBytesIndirect,
8492
(double)amplificationFactor, epilog);
8493
}
8494
8495
static void
8496
accountingOnAbort(XML_Parser originParser) {
8497
accountingReportStats(originParser, " ABORTING\n");
8498
}
8499
8500
static void
8501
accountingReportDiff(XML_Parser rootParser,
8502
unsigned int levelsAwayFromRootParser, const char *before,
8503
const char *after, ptrdiff_t bytesMore, int source_line,
8504
enum XML_Account account) {
8505
assert(! rootParser->m_parentParser);
8506
8507
fprintf(stderr,
8508
" (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%u, xmlparse.c:%d) %*s\"",
8509
bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
8510
levelsAwayFromRootParser, source_line, 10, "");
8511
8512
const char ellipis[] = "[..]";
8513
const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
8514
const unsigned int contextLength = 10;
8515
8516
/* Note: Performance is of no concern here */
8517
const char *walker = before;
8518
if ((rootParser->m_accounting.debugLevel >= 3u)
8519
|| (after - before)
8520
<= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
8521
for (; walker < after; walker++) {
8522
fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8523
}
8524
} else {
8525
for (; walker < before + contextLength; walker++) {
8526
fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8527
}
8528
fprintf(stderr, ellipis);
8529
walker = after - contextLength;
8530
for (; walker < after; walker++) {
8531
fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8532
}
8533
}
8534
fprintf(stderr, "\"\n");
8535
}
8536
8537
static XML_Bool
8538
accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
8539
const char *after, int source_line,
8540
enum XML_Account account) {
8541
/* Note: We need to check the token type *first* to be sure that
8542
* we can even access variable <after>, safely.
8543
* E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
8544
switch (tok) {
8545
case XML_TOK_INVALID:
8546
case XML_TOK_PARTIAL:
8547
case XML_TOK_PARTIAL_CHAR:
8548
case XML_TOK_NONE:
8549
return XML_TRUE;
8550
}
8551
8552
if (account == XML_ACCOUNT_NONE)
8553
return XML_TRUE; /* because these bytes have been accounted for, already */
8554
8555
unsigned int levelsAwayFromRootParser;
8556
const XML_Parser rootParser
8557
= getRootParserOf(originParser, &levelsAwayFromRootParser);
8558
assert(! rootParser->m_parentParser);
8559
8560
const int isDirect
8561
= (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
8562
const ptrdiff_t bytesMore = after - before;
8563
8564
XmlBigCount *const additionTarget
8565
= isDirect ? &rootParser->m_accounting.countBytesDirect
8566
: &rootParser->m_accounting.countBytesIndirect;
8567
8568
/* Detect and avoid integer overflow */
8569
if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
8570
return XML_FALSE;
8571
*additionTarget += bytesMore;
8572
8573
const XmlBigCount countBytesOutput
8574
= rootParser->m_accounting.countBytesDirect
8575
+ rootParser->m_accounting.countBytesIndirect;
8576
const float amplificationFactor
8577
= accountingGetCurrentAmplification(rootParser);
8578
const XML_Bool tolerated
8579
= (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
8580
|| (amplificationFactor
8581
<= rootParser->m_accounting.maximumAmplificationFactor);
8582
8583
if (rootParser->m_accounting.debugLevel >= 2u) {
8584
accountingReportStats(rootParser, "");
8585
accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
8586
bytesMore, source_line, account);
8587
}
8588
8589
return tolerated;
8590
}
8591
8592
unsigned long long
8593
testingAccountingGetCountBytesDirect(XML_Parser parser) {
8594
if (! parser)
8595
return 0;
8596
return parser->m_accounting.countBytesDirect;
8597
}
8598
8599
unsigned long long
8600
testingAccountingGetCountBytesIndirect(XML_Parser parser) {
8601
if (! parser)
8602
return 0;
8603
return parser->m_accounting.countBytesIndirect;
8604
}
8605
8606
static void
8607
entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
8608
const char *action, int sourceLine) {
8609
assert(! rootParser->m_parentParser);
8610
if (rootParser->m_entity_stats.debugLevel == 0u)
8611
return;
8612
8613
# if defined(XML_UNICODE)
8614
const char *const entityName = "[..]";
8615
# else
8616
const char *const entityName = entity->name;
8617
# endif
8618
8619
fprintf(
8620
stderr,
8621
"expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n",
8622
(void *)rootParser, rootParser->m_entity_stats.countEverOpened,
8623
rootParser->m_entity_stats.currentDepth,
8624
rootParser->m_entity_stats.maximumDepthSeen,
8625
((int)rootParser->m_entity_stats.currentDepth - 1) * 2, "",
8626
entity->is_param ? "%" : "&", entityName, action, entity->textLen,
8627
sourceLine);
8628
}
8629
8630
static void
8631
entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8632
const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8633
assert(! rootParser->m_parentParser);
8634
8635
rootParser->m_entity_stats.countEverOpened++;
8636
rootParser->m_entity_stats.currentDepth++;
8637
if (rootParser->m_entity_stats.currentDepth
8638
> rootParser->m_entity_stats.maximumDepthSeen) {
8639
rootParser->m_entity_stats.maximumDepthSeen++;
8640
}
8641
8642
entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
8643
}
8644
8645
static void
8646
entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8647
const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8648
assert(! rootParser->m_parentParser);
8649
8650
entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
8651
rootParser->m_entity_stats.currentDepth--;
8652
}
8653
8654
#endif /* XML_GE == 1 */
8655
8656
static XML_Parser
8657
getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
8658
XML_Parser rootParser = parser;
8659
unsigned int stepsTakenUpwards = 0;
8660
while (rootParser->m_parentParser) {
8661
rootParser = rootParser->m_parentParser;
8662
stepsTakenUpwards++;
8663
}
8664
assert(! rootParser->m_parentParser);
8665
if (outLevelDiff != NULL) {
8666
*outLevelDiff = stepsTakenUpwards;
8667
}
8668
return rootParser;
8669
}
8670
8671
#if XML_GE == 1
8672
8673
const char *
8674
unsignedCharToPrintable(unsigned char c) {
8675
switch (c) {
8676
case 0:
8677
return "\\0";
8678
case 1:
8679
return "\\x1";
8680
case 2:
8681
return "\\x2";
8682
case 3:
8683
return "\\x3";
8684
case 4:
8685
return "\\x4";
8686
case 5:
8687
return "\\x5";
8688
case 6:
8689
return "\\x6";
8690
case 7:
8691
return "\\x7";
8692
case 8:
8693
return "\\x8";
8694
case 9:
8695
return "\\t";
8696
case 10:
8697
return "\\n";
8698
case 11:
8699
return "\\xB";
8700
case 12:
8701
return "\\xC";
8702
case 13:
8703
return "\\r";
8704
case 14:
8705
return "\\xE";
8706
case 15:
8707
return "\\xF";
8708
case 16:
8709
return "\\x10";
8710
case 17:
8711
return "\\x11";
8712
case 18:
8713
return "\\x12";
8714
case 19:
8715
return "\\x13";
8716
case 20:
8717
return "\\x14";
8718
case 21:
8719
return "\\x15";
8720
case 22:
8721
return "\\x16";
8722
case 23:
8723
return "\\x17";
8724
case 24:
8725
return "\\x18";
8726
case 25:
8727
return "\\x19";
8728
case 26:
8729
return "\\x1A";
8730
case 27:
8731
return "\\x1B";
8732
case 28:
8733
return "\\x1C";
8734
case 29:
8735
return "\\x1D";
8736
case 30:
8737
return "\\x1E";
8738
case 31:
8739
return "\\x1F";
8740
case 32:
8741
return " ";
8742
case 33:
8743
return "!";
8744
case 34:
8745
return "\\\"";
8746
case 35:
8747
return "#";
8748
case 36:
8749
return "$";
8750
case 37:
8751
return "%";
8752
case 38:
8753
return "&";
8754
case 39:
8755
return "'";
8756
case 40:
8757
return "(";
8758
case 41:
8759
return ")";
8760
case 42:
8761
return "*";
8762
case 43:
8763
return "+";
8764
case 44:
8765
return ",";
8766
case 45:
8767
return "-";
8768
case 46:
8769
return ".";
8770
case 47:
8771
return "/";
8772
case 48:
8773
return "0";
8774
case 49:
8775
return "1";
8776
case 50:
8777
return "2";
8778
case 51:
8779
return "3";
8780
case 52:
8781
return "4";
8782
case 53:
8783
return "5";
8784
case 54:
8785
return "6";
8786
case 55:
8787
return "7";
8788
case 56:
8789
return "8";
8790
case 57:
8791
return "9";
8792
case 58:
8793
return ":";
8794
case 59:
8795
return ";";
8796
case 60:
8797
return "<";
8798
case 61:
8799
return "=";
8800
case 62:
8801
return ">";
8802
case 63:
8803
return "?";
8804
case 64:
8805
return "@";
8806
case 65:
8807
return "A";
8808
case 66:
8809
return "B";
8810
case 67:
8811
return "C";
8812
case 68:
8813
return "D";
8814
case 69:
8815
return "E";
8816
case 70:
8817
return "F";
8818
case 71:
8819
return "G";
8820
case 72:
8821
return "H";
8822
case 73:
8823
return "I";
8824
case 74:
8825
return "J";
8826
case 75:
8827
return "K";
8828
case 76:
8829
return "L";
8830
case 77:
8831
return "M";
8832
case 78:
8833
return "N";
8834
case 79:
8835
return "O";
8836
case 80:
8837
return "P";
8838
case 81:
8839
return "Q";
8840
case 82:
8841
return "R";
8842
case 83:
8843
return "S";
8844
case 84:
8845
return "T";
8846
case 85:
8847
return "U";
8848
case 86:
8849
return "V";
8850
case 87:
8851
return "W";
8852
case 88:
8853
return "X";
8854
case 89:
8855
return "Y";
8856
case 90:
8857
return "Z";
8858
case 91:
8859
return "[";
8860
case 92:
8861
return "\\\\";
8862
case 93:
8863
return "]";
8864
case 94:
8865
return "^";
8866
case 95:
8867
return "_";
8868
case 96:
8869
return "`";
8870
case 97:
8871
return "a";
8872
case 98:
8873
return "b";
8874
case 99:
8875
return "c";
8876
case 100:
8877
return "d";
8878
case 101:
8879
return "e";
8880
case 102:
8881
return "f";
8882
case 103:
8883
return "g";
8884
case 104:
8885
return "h";
8886
case 105:
8887
return "i";
8888
case 106:
8889
return "j";
8890
case 107:
8891
return "k";
8892
case 108:
8893
return "l";
8894
case 109:
8895
return "m";
8896
case 110:
8897
return "n";
8898
case 111:
8899
return "o";
8900
case 112:
8901
return "p";
8902
case 113:
8903
return "q";
8904
case 114:
8905
return "r";
8906
case 115:
8907
return "s";
8908
case 116:
8909
return "t";
8910
case 117:
8911
return "u";
8912
case 118:
8913
return "v";
8914
case 119:
8915
return "w";
8916
case 120:
8917
return "x";
8918
case 121:
8919
return "y";
8920
case 122:
8921
return "z";
8922
case 123:
8923
return "{";
8924
case 124:
8925
return "|";
8926
case 125:
8927
return "}";
8928
case 126:
8929
return "~";
8930
case 127:
8931
return "\\x7F";
8932
case 128:
8933
return "\\x80";
8934
case 129:
8935
return "\\x81";
8936
case 130:
8937
return "\\x82";
8938
case 131:
8939
return "\\x83";
8940
case 132:
8941
return "\\x84";
8942
case 133:
8943
return "\\x85";
8944
case 134:
8945
return "\\x86";
8946
case 135:
8947
return "\\x87";
8948
case 136:
8949
return "\\x88";
8950
case 137:
8951
return "\\x89";
8952
case 138:
8953
return "\\x8A";
8954
case 139:
8955
return "\\x8B";
8956
case 140:
8957
return "\\x8C";
8958
case 141:
8959
return "\\x8D";
8960
case 142:
8961
return "\\x8E";
8962
case 143:
8963
return "\\x8F";
8964
case 144:
8965
return "\\x90";
8966
case 145:
8967
return "\\x91";
8968
case 146:
8969
return "\\x92";
8970
case 147:
8971
return "\\x93";
8972
case 148:
8973
return "\\x94";
8974
case 149:
8975
return "\\x95";
8976
case 150:
8977
return "\\x96";
8978
case 151:
8979
return "\\x97";
8980
case 152:
8981
return "\\x98";
8982
case 153:
8983
return "\\x99";
8984
case 154:
8985
return "\\x9A";
8986
case 155:
8987
return "\\x9B";
8988
case 156:
8989
return "\\x9C";
8990
case 157:
8991
return "\\x9D";
8992
case 158:
8993
return "\\x9E";
8994
case 159:
8995
return "\\x9F";
8996
case 160:
8997
return "\\xA0";
8998
case 161:
8999
return "\\xA1";
9000
case 162:
9001
return "\\xA2";
9002
case 163:
9003
return "\\xA3";
9004
case 164:
9005
return "\\xA4";
9006
case 165:
9007
return "\\xA5";
9008
case 166:
9009
return "\\xA6";
9010
case 167:
9011
return "\\xA7";
9012
case 168:
9013
return "\\xA8";
9014
case 169:
9015
return "\\xA9";
9016
case 170:
9017
return "\\xAA";
9018
case 171:
9019
return "\\xAB";
9020
case 172:
9021
return "\\xAC";
9022
case 173:
9023
return "\\xAD";
9024
case 174:
9025
return "\\xAE";
9026
case 175:
9027
return "\\xAF";
9028
case 176:
9029
return "\\xB0";
9030
case 177:
9031
return "\\xB1";
9032
case 178:
9033
return "\\xB2";
9034
case 179:
9035
return "\\xB3";
9036
case 180:
9037
return "\\xB4";
9038
case 181:
9039
return "\\xB5";
9040
case 182:
9041
return "\\xB6";
9042
case 183:
9043
return "\\xB7";
9044
case 184:
9045
return "\\xB8";
9046
case 185:
9047
return "\\xB9";
9048
case 186:
9049
return "\\xBA";
9050
case 187:
9051
return "\\xBB";
9052
case 188:
9053
return "\\xBC";
9054
case 189:
9055
return "\\xBD";
9056
case 190:
9057
return "\\xBE";
9058
case 191:
9059
return "\\xBF";
9060
case 192:
9061
return "\\xC0";
9062
case 193:
9063
return "\\xC1";
9064
case 194:
9065
return "\\xC2";
9066
case 195:
9067
return "\\xC3";
9068
case 196:
9069
return "\\xC4";
9070
case 197:
9071
return "\\xC5";
9072
case 198:
9073
return "\\xC6";
9074
case 199:
9075
return "\\xC7";
9076
case 200:
9077
return "\\xC8";
9078
case 201:
9079
return "\\xC9";
9080
case 202:
9081
return "\\xCA";
9082
case 203:
9083
return "\\xCB";
9084
case 204:
9085
return "\\xCC";
9086
case 205:
9087
return "\\xCD";
9088
case 206:
9089
return "\\xCE";
9090
case 207:
9091
return "\\xCF";
9092
case 208:
9093
return "\\xD0";
9094
case 209:
9095
return "\\xD1";
9096
case 210:
9097
return "\\xD2";
9098
case 211:
9099
return "\\xD3";
9100
case 212:
9101
return "\\xD4";
9102
case 213:
9103
return "\\xD5";
9104
case 214:
9105
return "\\xD6";
9106
case 215:
9107
return "\\xD7";
9108
case 216:
9109
return "\\xD8";
9110
case 217:
9111
return "\\xD9";
9112
case 218:
9113
return "\\xDA";
9114
case 219:
9115
return "\\xDB";
9116
case 220:
9117
return "\\xDC";
9118
case 221:
9119
return "\\xDD";
9120
case 222:
9121
return "\\xDE";
9122
case 223:
9123
return "\\xDF";
9124
case 224:
9125
return "\\xE0";
9126
case 225:
9127
return "\\xE1";
9128
case 226:
9129
return "\\xE2";
9130
case 227:
9131
return "\\xE3";
9132
case 228:
9133
return "\\xE4";
9134
case 229:
9135
return "\\xE5";
9136
case 230:
9137
return "\\xE6";
9138
case 231:
9139
return "\\xE7";
9140
case 232:
9141
return "\\xE8";
9142
case 233:
9143
return "\\xE9";
9144
case 234:
9145
return "\\xEA";
9146
case 235:
9147
return "\\xEB";
9148
case 236:
9149
return "\\xEC";
9150
case 237:
9151
return "\\xED";
9152
case 238:
9153
return "\\xEE";
9154
case 239:
9155
return "\\xEF";
9156
case 240:
9157
return "\\xF0";
9158
case 241:
9159
return "\\xF1";
9160
case 242:
9161
return "\\xF2";
9162
case 243:
9163
return "\\xF3";
9164
case 244:
9165
return "\\xF4";
9166
case 245:
9167
return "\\xF5";
9168
case 246:
9169
return "\\xF6";
9170
case 247:
9171
return "\\xF7";
9172
case 248:
9173
return "\\xF8";
9174
case 249:
9175
return "\\xF9";
9176
case 250:
9177
return "\\xFA";
9178
case 251:
9179
return "\\xFB";
9180
case 252:
9181
return "\\xFC";
9182
case 253:
9183
return "\\xFD";
9184
case 254:
9185
return "\\xFE";
9186
case 255:
9187
return "\\xFF";
9188
// LCOV_EXCL_START
9189
default:
9190
assert(0); /* never gets here */
9191
return "dead code";
9192
}
9193
assert(0); /* never gets here */
9194
// LCOV_EXCL_STOP
9195
}
9196
9197
#endif /* XML_GE == 1 */
9198
9199
static unsigned long
9200
getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
9201
const char *const valueOrNull = getenv(variableName);
9202
if (valueOrNull == NULL) {
9203
return defaultDebugLevel;
9204
}
9205
const char *const value = valueOrNull;
9206
9207
errno = 0;
9208
char *afterValue = NULL;
9209
unsigned long debugLevel = strtoul(value, &afterValue, 10);
9210
if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
9211
errno = 0;
9212
return defaultDebugLevel;
9213
}
9214
9215
return debugLevel;
9216
}
9217
9218