CoCalc -- uri.c

GitHub Repository: wine-mirror/wine
Path: blob/master/libs/xml2/uri.c
⁴³⁸⁹ views
1
/**
2
 * uri.c: set of generic URI related routines
3
 *
4
 * Reference: RFCs 3986, 2732 and 2373
5
 *
6
 * See Copyright for the status of this software.
7
 *
8
 * [email protected]
9
 */
10

11
#define IN_LIBXML
12
#include "libxml.h"
13

14
#include <limits.h>
15
#include <string.h>
16

17
#include <libxml/xmlmemory.h>
18
#include <libxml/uri.h>
19
#include <libxml/xmlerror.h>
20

21
#include "private/error.h"
22

23
/**
24
 * MAX_URI_LENGTH:
25
 *
26
 * The definition of the URI regexp in the above RFC has no size limit
27
 * In practice they are usually relatively short except for the
28
 * data URI scheme as defined in RFC 2397. Even for data URI the usual
29
 * maximum size before hitting random practical limits is around 64 KB
30
 * and 4KB is usually a maximum admitted limit for proper operations.
31
 * The value below is more a security limit than anything else and
32
 * really should never be hit by 'normal' operations
33
 * Set to 1 MByte in 2012, this is only enforced on output
34
 */
35
#define MAX_URI_LENGTH 1024 * 1024
36

37
#define PORT_EMPTY           0
38
#define PORT_EMPTY_SERVER   -1
39

40
static void
41
xmlURIErrMemory(const char *extra)
42
{
43
    if (extra)
44
        __xmlRaiseError(NULL, NULL, NULL,
45
                        NULL, NULL, XML_FROM_URI,
46
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
47
                        extra, NULL, NULL, 0, 0,
48
                        "Memory allocation failed : %s\n", extra);
49
    else
50
        __xmlRaiseError(NULL, NULL, NULL,
51
                        NULL, NULL, XML_FROM_URI,
52
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
53
                        NULL, NULL, NULL, 0, 0,
54
                        "Memory allocation failed\n");
55
}
56

57
static void xmlCleanURI(xmlURIPtr uri);
58

59
/*
60
 * Old rule from 2396 used in legacy handling code
61
 * alpha    = lowalpha | upalpha
62
 */
63
#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
64

65

66
/*
67
 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
68
 *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
69
 *            "u" | "v" | "w" | "x" | "y" | "z"
70
 */
71

72
#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
73

74
/*
75
 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
76
 *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
77
 *           "U" | "V" | "W" | "X" | "Y" | "Z"
78
 */
79
#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
80

81
#ifdef IS_DIGIT
82
#undef IS_DIGIT
83
#endif
84
/*
85
 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
86
 */
87
#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
88

89
/*
90
 * alphanum = alpha | digit
91
 */
92

93
#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
94

95
/*
96
 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
97
 */
98

99
#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
100
    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
101
    ((x) == '(') || ((x) == ')'))
102

103
/*
104
 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
105
 */
106

107
#define IS_UNWISE(p)                                                    \
108
      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
109
       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
110
       ((*(p) == ']')) || ((*(p) == '`')))
111
/*
112
 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
113
 *            "[" | "]"
114
 */
115

116
#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
117
        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
118
        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
119
        ((x) == ']'))
120

121
/*
122
 * unreserved = alphanum | mark
123
 */
124

125
#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
126

127
/*
128
 * Skip to next pointer char, handle escaped sequences
129
 */
130

131
#define NEXT(p) ((*p == '%')? p += 3 : p++)
132

133
/*
134
 * Productions from the spec.
135
 *
136
 *    authority     = server | reg_name
137
 *    reg_name      = 1*( unreserved | escaped | "$" | "," |
138
 *                        ";" | ":" | "@" | "&" | "=" | "+" )
139
 *
140
 * path          = [ abs_path | opaque_part ]
141
 */
142

143
#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
144

145
/************************************************************************
146
 *									*
147
 *                         RFC 3986 parser				*
148
 *									*
149
 ************************************************************************/
150

151
#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
152
#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
153
                      ((*(p) >= 'A') && (*(p) <= 'Z')))
154
#define ISA_HEXDIG(p)							\
155
       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
156
        ((*(p) >= 'A') && (*(p) <= 'F')))
157

158
/*
159
 *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
160
 *                     / "*" / "+" / "," / ";" / "="
161
 */
162
#define ISA_SUB_DELIM(p)						\
163
      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
164
       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
165
       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
166
       ((*(p) == '=')) || ((*(p) == '\'')))
167

168
/*
169
 *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
170
 */
171
#define ISA_GEN_DELIM(p)						\
172
      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
173
       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
174
       ((*(p) == '@')))
175

176
/*
177
 *    reserved      = gen-delims / sub-delims
178
 */
179
#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
180

181
/*
182
 *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
183
 */
184
#define ISA_UNRESERVED(p)						\
185
      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
186
       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
187

188
/*
189
 *    pct-encoded   = "%" HEXDIG HEXDIG
190
 */
191
#define ISA_PCT_ENCODED(p)						\
192
     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
193

194
/*
195
 *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
196
 */
197
#define ISA_PCHAR(p)							\
198
     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
199
      ((*(p) == ':')) || ((*(p) == '@')))
200

201
/**
202
 * xmlParse3986Scheme:
203
 * @uri:  pointer to an URI structure
204
 * @str:  pointer to the string to analyze
205
 *
206
 * Parse an URI scheme
207
 *
208
 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
209
 *
210
 * Returns 0 or the error code
211
 */
212
static int
213
xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
214
    const char *cur;
215

216
    if (str == NULL)
217
	return(-1);
218

219
    cur = *str;
220
    if (!ISA_ALPHA(cur))
221
	return(2);
222
    cur++;
223
    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
224
           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
225
    if (uri != NULL) {
226
	if (uri->scheme != NULL) xmlFree(uri->scheme);
227
	uri->scheme = STRNDUP(*str, cur - *str);
228
    }
229
    *str = cur;
230
    return(0);
231
}
232

233
/**
234
 * xmlParse3986Fragment:
235
 * @uri:  pointer to an URI structure
236
 * @str:  pointer to the string to analyze
237
 *
238
 * Parse the query part of an URI
239
 *
240
 * fragment      = *( pchar / "/" / "?" )
241
 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
242
 *       in the fragment identifier but this is used very broadly for
243
 *       xpointer scheme selection, so we are allowing it here to not break
244
 *       for example all the DocBook processing chains.
245
 *
246
 * Returns 0 or the error code
247
 */
248
static int
249
xmlParse3986Fragment(xmlURIPtr uri, const char **str)
250
{
251
    const char *cur;
252

253
    if (str == NULL)
254
        return (-1);
255

256
    cur = *str;
257

258
    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
259
           (*cur == '[') || (*cur == ']') ||
260
           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
261
        NEXT(cur);
262
    if (uri != NULL) {
263
        if (uri->fragment != NULL)
264
            xmlFree(uri->fragment);
265
	if (uri->cleanup & 2)
266
	    uri->fragment = STRNDUP(*str, cur - *str);
267
	else
268
	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
269
    }
270
    *str = cur;
271
    return (0);
272
}
273

274
/**
275
 * xmlParse3986Query:
276
 * @uri:  pointer to an URI structure
277
 * @str:  pointer to the string to analyze
278
 *
279
 * Parse the query part of an URI
280
 *
281
 * query = *uric
282
 *
283
 * Returns 0 or the error code
284
 */
285
static int
286
xmlParse3986Query(xmlURIPtr uri, const char **str)
287
{
288
    const char *cur;
289

290
    if (str == NULL)
291
        return (-1);
292

293
    cur = *str;
294

295
    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
296
           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
297
        NEXT(cur);
298
    if (uri != NULL) {
299
        if (uri->query != NULL)
300
            xmlFree(uri->query);
301
	if (uri->cleanup & 2)
302
	    uri->query = STRNDUP(*str, cur - *str);
303
	else
304
	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
305

306
	/* Save the raw bytes of the query as well.
307
	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
308
	 */
309
	if (uri->query_raw != NULL)
310
	    xmlFree (uri->query_raw);
311
	uri->query_raw = STRNDUP (*str, cur - *str);
312
    }
313
    *str = cur;
314
    return (0);
315
}
316

317
/**
318
 * xmlParse3986Port:
319
 * @uri:  pointer to an URI structure
320
 * @str:  the string to analyze
321
 *
322
 * Parse a port part and fills in the appropriate fields
323
 * of the @uri structure
324
 *
325
 * port          = *DIGIT
326
 *
327
 * Returns 0 or the error code
328
 */
329
static int
330
xmlParse3986Port(xmlURIPtr uri, const char **str)
331
{
332
    const char *cur = *str;
333
    int port = 0;
334

335
    if (ISA_DIGIT(cur)) {
336
	while (ISA_DIGIT(cur)) {
337
            int digit = *cur - '0';
338

339
            if (port > INT_MAX / 10)
340
                return(1);
341
            port *= 10;
342
            if (port > INT_MAX - digit)
343
                return(1);
344
	    port += digit;
345

346
	    cur++;
347
	}
348
	if (uri != NULL)
349
	    uri->port = port;
350
	*str = cur;
351
	return(0);
352
    }
353
    return(1);
354
}
355

356
/**
357
 * xmlParse3986Userinfo:
358
 * @uri:  pointer to an URI structure
359
 * @str:  the string to analyze
360
 *
361
 * Parse an user information part and fills in the appropriate fields
362
 * of the @uri structure
363
 *
364
 * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
365
 *
366
 * Returns 0 or the error code
367
 */
368
static int
369
xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
370
{
371
    const char *cur;
372

373
    cur = *str;
374
    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
375
           ISA_SUB_DELIM(cur) || (*cur == ':'))
376
	NEXT(cur);
377
    if (*cur == '@') {
378
	if (uri != NULL) {
379
	    if (uri->user != NULL) xmlFree(uri->user);
380
	    if (uri->cleanup & 2)
381
		uri->user = STRNDUP(*str, cur - *str);
382
	    else
383
		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
384
	}
385
	*str = cur;
386
	return(0);
387
    }
388
    return(1);
389
}
390

391
/**
392
 * xmlParse3986DecOctet:
393
 * @str:  the string to analyze
394
 *
395
 *    dec-octet     = DIGIT                 ; 0-9
396
 *                  / %x31-39 DIGIT         ; 10-99
397
 *                  / "1" 2DIGIT            ; 100-199
398
 *                  / "2" %x30-34 DIGIT     ; 200-249
399
 *                  / "25" %x30-35          ; 250-255
400
 *
401
 * Skip a dec-octet.
402
 *
403
 * Returns 0 if found and skipped, 1 otherwise
404
 */
405
static int
406
xmlParse3986DecOctet(const char **str) {
407
    const char *cur = *str;
408

409
    if (!(ISA_DIGIT(cur)))
410
        return(1);
411
    if (!ISA_DIGIT(cur+1))
412
	cur++;
413
    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
414
	cur += 2;
415
    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
416
	cur += 3;
417
    else if ((*cur == '2') && (*(cur + 1) >= '0') &&
418
	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
419
	cur += 3;
420
    else if ((*cur == '2') && (*(cur + 1) == '5') &&
421
	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
422
	cur += 3;
423
    else
424
        return(1);
425
    *str = cur;
426
    return(0);
427
}
428
/**
429
 * xmlParse3986Host:
430
 * @uri:  pointer to an URI structure
431
 * @str:  the string to analyze
432
 *
433
 * Parse an host part and fills in the appropriate fields
434
 * of the @uri structure
435
 *
436
 * host          = IP-literal / IPv4address / reg-name
437
 * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
438
 * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
439
 * reg-name      = *( unreserved / pct-encoded / sub-delims )
440
 *
441
 * Returns 0 or the error code
442
 */
443
static int
444
xmlParse3986Host(xmlURIPtr uri, const char **str)
445
{
446
    const char *cur = *str;
447
    const char *host;
448

449
    host = cur;
450
    /*
451
     * IPv6 and future addressing scheme are enclosed between brackets
452
     */
453
    if (*cur == '[') {
454
        cur++;
455
	while ((*cur != ']') && (*cur != 0))
456
	    cur++;
457
	if (*cur != ']')
458
	    return(1);
459
	cur++;
460
	goto found;
461
    }
462
    /*
463
     * try to parse an IPv4
464
     */
465
    if (ISA_DIGIT(cur)) {
466
        if (xmlParse3986DecOctet(&cur) != 0)
467
	    goto not_ipv4;
468
	if (*cur != '.')
469
	    goto not_ipv4;
470
	cur++;
471
        if (xmlParse3986DecOctet(&cur) != 0)
472
	    goto not_ipv4;
473
	if (*cur != '.')
474
	    goto not_ipv4;
475
        if (xmlParse3986DecOctet(&cur) != 0)
476
	    goto not_ipv4;
477
	if (*cur != '.')
478
	    goto not_ipv4;
479
        if (xmlParse3986DecOctet(&cur) != 0)
480
	    goto not_ipv4;
481
	goto found;
482
not_ipv4:
483
        cur = *str;
484
    }
485
    /*
486
     * then this should be a hostname which can be empty
487
     */
488
    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
489
        NEXT(cur);
490
found:
491
    if (uri != NULL) {
492
	if (uri->authority != NULL) xmlFree(uri->authority);
493
	uri->authority = NULL;
494
	if (uri->server != NULL) xmlFree(uri->server);
495
	if (cur != host) {
496
	    if (uri->cleanup & 2)
497
		uri->server = STRNDUP(host, cur - host);
498
	    else
499
		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
500
	} else
501
	    uri->server = NULL;
502
    }
503
    *str = cur;
504
    return(0);
505
}
506

507
/**
508
 * xmlParse3986Authority:
509
 * @uri:  pointer to an URI structure
510
 * @str:  the string to analyze
511
 *
512
 * Parse an authority part and fills in the appropriate fields
513
 * of the @uri structure
514
 *
515
 * authority     = [ userinfo "@" ] host [ ":" port ]
516
 *
517
 * Returns 0 or the error code
518
 */
519
static int
520
xmlParse3986Authority(xmlURIPtr uri, const char **str)
521
{
522
    const char *cur;
523
    int ret;
524

525
    cur = *str;
526
    /*
527
     * try to parse an userinfo and check for the trailing @
528
     */
529
    ret = xmlParse3986Userinfo(uri, &cur);
530
    if ((ret != 0) || (*cur != '@'))
531
        cur = *str;
532
    else
533
        cur++;
534
    ret = xmlParse3986Host(uri, &cur);
535
    if (ret != 0) return(ret);
536
    if (*cur == ':') {
537
        cur++;
538
        ret = xmlParse3986Port(uri, &cur);
539
	if (ret != 0) return(ret);
540
    }
541
    *str = cur;
542
    return(0);
543
}
544

545
/**
546
 * xmlParse3986Segment:
547
 * @str:  the string to analyze
548
 * @forbid: an optional forbidden character
549
 * @empty: allow an empty segment
550
 *
551
 * Parse a segment and fills in the appropriate fields
552
 * of the @uri structure
553
 *
554
 * segment       = *pchar
555
 * segment-nz    = 1*pchar
556
 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
557
 *               ; non-zero-length segment without any colon ":"
558
 *
559
 * Returns 0 or the error code
560
 */
561
static int
562
xmlParse3986Segment(const char **str, char forbid, int empty)
563
{
564
    const char *cur;
565

566
    cur = *str;
567
    if (!ISA_PCHAR(cur)) {
568
        if (empty)
569
	    return(0);
570
	return(1);
571
    }
572
    while (ISA_PCHAR(cur) && (*cur != forbid))
573
        NEXT(cur);
574
    *str = cur;
575
    return (0);
576
}
577

578
/**
579
 * xmlParse3986PathAbEmpty:
580
 * @uri:  pointer to an URI structure
581
 * @str:  the string to analyze
582
 *
583
 * Parse an path absolute or empty and fills in the appropriate fields
584
 * of the @uri structure
585
 *
586
 * path-abempty  = *( "/" segment )
587
 *
588
 * Returns 0 or the error code
589
 */
590
static int
591
xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
592
{
593
    const char *cur;
594
    int ret;
595

596
    cur = *str;
597

598
    while (*cur == '/') {
599
        cur++;
600
	ret = xmlParse3986Segment(&cur, 0, 1);
601
	if (ret != 0) return(ret);
602
    }
603
    if (uri != NULL) {
604
	if (uri->path != NULL) xmlFree(uri->path);
605
        if (*str != cur) {
606
            if (uri->cleanup & 2)
607
                uri->path = STRNDUP(*str, cur - *str);
608
            else
609
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
610
        } else {
611
            uri->path = NULL;
612
        }
613
    }
614
    *str = cur;
615
    return (0);
616
}
617

618
/**
619
 * xmlParse3986PathAbsolute:
620
 * @uri:  pointer to an URI structure
621
 * @str:  the string to analyze
622
 *
623
 * Parse an path absolute and fills in the appropriate fields
624
 * of the @uri structure
625
 *
626
 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
627
 *
628
 * Returns 0 or the error code
629
 */
630
static int
631
xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
632
{
633
    const char *cur;
634
    int ret;
635

636
    cur = *str;
637

638
    if (*cur != '/')
639
        return(1);
640
    cur++;
641
    ret = xmlParse3986Segment(&cur, 0, 0);
642
    if (ret == 0) {
643
	while (*cur == '/') {
644
	    cur++;
645
	    ret = xmlParse3986Segment(&cur, 0, 1);
646
	    if (ret != 0) return(ret);
647
	}
648
    }
649
    if (uri != NULL) {
650
	if (uri->path != NULL) xmlFree(uri->path);
651
        if (cur != *str) {
652
            if (uri->cleanup & 2)
653
                uri->path = STRNDUP(*str, cur - *str);
654
            else
655
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
656
        } else {
657
            uri->path = NULL;
658
        }
659
    }
660
    *str = cur;
661
    return (0);
662
}
663

664
/**
665
 * xmlParse3986PathRootless:
666
 * @uri:  pointer to an URI structure
667
 * @str:  the string to analyze
668
 *
669
 * Parse an path without root and fills in the appropriate fields
670
 * of the @uri structure
671
 *
672
 * path-rootless = segment-nz *( "/" segment )
673
 *
674
 * Returns 0 or the error code
675
 */
676
static int
677
xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
678
{
679
    const char *cur;
680
    int ret;
681

682
    cur = *str;
683

684
    ret = xmlParse3986Segment(&cur, 0, 0);
685
    if (ret != 0) return(ret);
686
    while (*cur == '/') {
687
        cur++;
688
	ret = xmlParse3986Segment(&cur, 0, 1);
689
	if (ret != 0) return(ret);
690
    }
691
    if (uri != NULL) {
692
	if (uri->path != NULL) xmlFree(uri->path);
693
        if (cur != *str) {
694
            if (uri->cleanup & 2)
695
                uri->path = STRNDUP(*str, cur - *str);
696
            else
697
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
698
        } else {
699
            uri->path = NULL;
700
        }
701
    }
702
    *str = cur;
703
    return (0);
704
}
705

706
/**
707
 * xmlParse3986PathNoScheme:
708
 * @uri:  pointer to an URI structure
709
 * @str:  the string to analyze
710
 *
711
 * Parse an path which is not a scheme and fills in the appropriate fields
712
 * of the @uri structure
713
 *
714
 * path-noscheme = segment-nz-nc *( "/" segment )
715
 *
716
 * Returns 0 or the error code
717
 */
718
static int
719
xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
720
{
721
    const char *cur;
722
    int ret;
723

724
    cur = *str;
725

726
    ret = xmlParse3986Segment(&cur, ':', 0);
727
    if (ret != 0) return(ret);
728
    while (*cur == '/') {
729
        cur++;
730
	ret = xmlParse3986Segment(&cur, 0, 1);
731
	if (ret != 0) return(ret);
732
    }
733
    if (uri != NULL) {
734
	if (uri->path != NULL) xmlFree(uri->path);
735
        if (cur != *str) {
736
            if (uri->cleanup & 2)
737
                uri->path = STRNDUP(*str, cur - *str);
738
            else
739
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
740
        } else {
741
            uri->path = NULL;
742
        }
743
    }
744
    *str = cur;
745
    return (0);
746
}
747

748
/**
749
 * xmlParse3986HierPart:
750
 * @uri:  pointer to an URI structure
751
 * @str:  the string to analyze
752
 *
753
 * Parse an hierarchical part and fills in the appropriate fields
754
 * of the @uri structure
755
 *
756
 * hier-part     = "//" authority path-abempty
757
 *                / path-absolute
758
 *                / path-rootless
759
 *                / path-empty
760
 *
761
 * Returns 0 or the error code
762
 */
763
static int
764
xmlParse3986HierPart(xmlURIPtr uri, const char **str)
765
{
766
    const char *cur;
767
    int ret;
768

769
    cur = *str;
770

771
    if ((*cur == '/') && (*(cur + 1) == '/')) {
772
        cur += 2;
773
	ret = xmlParse3986Authority(uri, &cur);
774
	if (ret != 0) return(ret);
775
        /*
776
         * An empty server is marked with a special URI value.
777
         */
778
	if ((uri->server == NULL) && (uri->port == PORT_EMPTY))
779
	    uri->port = PORT_EMPTY_SERVER;
780
	ret = xmlParse3986PathAbEmpty(uri, &cur);
781
	if (ret != 0) return(ret);
782
	*str = cur;
783
	return(0);
784
    } else if (*cur == '/') {
785
        ret = xmlParse3986PathAbsolute(uri, &cur);
786
	if (ret != 0) return(ret);
787
    } else if (ISA_PCHAR(cur)) {
788
        ret = xmlParse3986PathRootless(uri, &cur);
789
	if (ret != 0) return(ret);
790
    } else {
791
	/* path-empty is effectively empty */
792
	if (uri != NULL) {
793
	    if (uri->path != NULL) xmlFree(uri->path);
794
	    uri->path = NULL;
795
	}
796
    }
797
    *str = cur;
798
    return (0);
799
}
800

801
/**
802
 * xmlParse3986RelativeRef:
803
 * @uri:  pointer to an URI structure
804
 * @str:  the string to analyze
805
 *
806
 * Parse an URI string and fills in the appropriate fields
807
 * of the @uri structure
808
 *
809
 * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
810
 * relative-part = "//" authority path-abempty
811
 *               / path-absolute
812
 *               / path-noscheme
813
 *               / path-empty
814
 *
815
 * Returns 0 or the error code
816
 */
817
static int
818
xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
819
    int ret;
820

821
    if ((*str == '/') && (*(str + 1) == '/')) {
822
        str += 2;
823
	ret = xmlParse3986Authority(uri, &str);
824
	if (ret != 0) return(ret);
825
	ret = xmlParse3986PathAbEmpty(uri, &str);
826
	if (ret != 0) return(ret);
827
    } else if (*str == '/') {
828
	ret = xmlParse3986PathAbsolute(uri, &str);
829
	if (ret != 0) return(ret);
830
    } else if (ISA_PCHAR(str)) {
831
        ret = xmlParse3986PathNoScheme(uri, &str);
832
	if (ret != 0) return(ret);
833
    } else {
834
	/* path-empty is effectively empty */
835
	if (uri != NULL) {
836
	    if (uri->path != NULL) xmlFree(uri->path);
837
	    uri->path = NULL;
838
	}
839
    }
840

841
    if (*str == '?') {
842
	str++;
843
	ret = xmlParse3986Query(uri, &str);
844
	if (ret != 0) return(ret);
845
    }
846
    if (*str == '#') {
847
	str++;
848
	ret = xmlParse3986Fragment(uri, &str);
849
	if (ret != 0) return(ret);
850
    }
851
    if (*str != 0) {
852
	xmlCleanURI(uri);
853
	return(1);
854
    }
855
    return(0);
856
}
857

858

859
/**
860
 * xmlParse3986URI:
861
 * @uri:  pointer to an URI structure
862
 * @str:  the string to analyze
863
 *
864
 * Parse an URI string and fills in the appropriate fields
865
 * of the @uri structure
866
 *
867
 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
868
 *
869
 * Returns 0 or the error code
870
 */
871
static int
872
xmlParse3986URI(xmlURIPtr uri, const char *str) {
873
    int ret;
874

875
    ret = xmlParse3986Scheme(uri, &str);
876
    if (ret != 0) return(ret);
877
    if (*str != ':') {
878
	return(1);
879
    }
880
    str++;
881
    ret = xmlParse3986HierPart(uri, &str);
882
    if (ret != 0) return(ret);
883
    if (*str == '?') {
884
	str++;
885
	ret = xmlParse3986Query(uri, &str);
886
	if (ret != 0) return(ret);
887
    }
888
    if (*str == '#') {
889
	str++;
890
	ret = xmlParse3986Fragment(uri, &str);
891
	if (ret != 0) return(ret);
892
    }
893
    if (*str != 0) {
894
	xmlCleanURI(uri);
895
	return(1);
896
    }
897
    return(0);
898
}
899

900
/**
901
 * xmlParse3986URIReference:
902
 * @uri:  pointer to an URI structure
903
 * @str:  the string to analyze
904
 *
905
 * Parse an URI reference string and fills in the appropriate fields
906
 * of the @uri structure
907
 *
908
 * URI-reference = URI / relative-ref
909
 *
910
 * Returns 0 or the error code
911
 */
912
static int
913
xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
914
    int ret;
915

916
    if (str == NULL)
917
	return(-1);
918
    xmlCleanURI(uri);
919

920
    /*
921
     * Try first to parse absolute refs, then fallback to relative if
922
     * it fails.
923
     */
924
    ret = xmlParse3986URI(uri, str);
925
    if (ret != 0) {
926
	xmlCleanURI(uri);
927
        ret = xmlParse3986RelativeRef(uri, str);
928
	if (ret != 0) {
929
	    xmlCleanURI(uri);
930
	    return(ret);
931
	}
932
    }
933
    return(0);
934
}
935

936
/**
937
 * xmlParseURI:
938
 * @str:  the URI string to analyze
939
 *
940
 * Parse an URI based on RFC 3986
941
 *
942
 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
943
 *
944
 * Returns a newly built xmlURIPtr or NULL in case of error
945
 */
946
xmlURIPtr
947
xmlParseURI(const char *str) {
948
    xmlURIPtr uri;
949
    int ret;
950

951
    if (str == NULL)
952
	return(NULL);
953
    uri = xmlCreateURI();
954
    if (uri != NULL) {
955
	ret = xmlParse3986URIReference(uri, str);
956
        if (ret) {
957
	    xmlFreeURI(uri);
958
	    return(NULL);
959
	}
960
    }
961
    return(uri);
962
}
963

964
/**
965
 * xmlParseURIReference:
966
 * @uri:  pointer to an URI structure
967
 * @str:  the string to analyze
968
 *
969
 * Parse an URI reference string based on RFC 3986 and fills in the
970
 * appropriate fields of the @uri structure
971
 *
972
 * URI-reference = URI / relative-ref
973
 *
974
 * Returns 0 or the error code
975
 */
976
int
977
xmlParseURIReference(xmlURIPtr uri, const char *str) {
978
    return(xmlParse3986URIReference(uri, str));
979
}
980

981
/**
982
 * xmlParseURIRaw:
983
 * @str:  the URI string to analyze
984
 * @raw:  if 1 unescaping of URI pieces are disabled
985
 *
986
 * Parse an URI but allows to keep intact the original fragments.
987
 *
988
 * URI-reference = URI / relative-ref
989
 *
990
 * Returns a newly built xmlURIPtr or NULL in case of error
991
 */
992
xmlURIPtr
993
xmlParseURIRaw(const char *str, int raw) {
994
    xmlURIPtr uri;
995
    int ret;
996

997
    if (str == NULL)
998
	return(NULL);
999
    uri = xmlCreateURI();
1000
    if (uri != NULL) {
1001
        if (raw) {
1002
	    uri->cleanup |= 2;
1003
	}
1004
	ret = xmlParseURIReference(uri, str);
1005
        if (ret) {
1006
	    xmlFreeURI(uri);
1007
	    return(NULL);
1008
	}
1009
    }
1010
    return(uri);
1011
}
1012

1013
/************************************************************************
1014
 *									*
1015
 *			Generic URI structure functions			*
1016
 *									*
1017
 ************************************************************************/
1018

1019
/**
1020
 * xmlCreateURI:
1021
 *
1022
 * Simply creates an empty xmlURI
1023
 *
1024
 * Returns the new structure or NULL in case of error
1025
 */
1026
xmlURIPtr
1027
xmlCreateURI(void) {
1028
    xmlURIPtr ret;
1029

1030
    ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1031
    if (ret == NULL) {
1032
        xmlURIErrMemory("creating URI structure\n");
1033
	return(NULL);
1034
    }
1035
    memset(ret, 0, sizeof(xmlURI));
1036
    ret->port = PORT_EMPTY;
1037
    return(ret);
1038
}
1039

1040
/**
1041
 * xmlSaveUriRealloc:
1042
 *
1043
 * Function to handle properly a reallocation when saving an URI
1044
 * Also imposes some limit on the length of an URI string output
1045
 */
1046
static xmlChar *
1047
xmlSaveUriRealloc(xmlChar *ret, int *max) {
1048
    xmlChar *temp;
1049
    int tmp;
1050

1051
    if (*max > MAX_URI_LENGTH) {
1052
        xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1053
        return(NULL);
1054
    }
1055
    tmp = *max * 2;
1056
    temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1057
    if (temp == NULL) {
1058
        xmlURIErrMemory("saving URI\n");
1059
        return(NULL);
1060
    }
1061
    *max = tmp;
1062
    return(temp);
1063
}
1064

1065
/**
1066
 * xmlSaveUri:
1067
 * @uri:  pointer to an xmlURI
1068
 *
1069
 * Save the URI as an escaped string
1070
 *
1071
 * Returns a new string (to be deallocated by caller)
1072
 */
1073
xmlChar *
1074
xmlSaveUri(xmlURIPtr uri) {
1075
    xmlChar *ret = NULL;
1076
    xmlChar *temp;
1077
    const char *p;
1078
    int len;
1079
    int max;
1080

1081
    if (uri == NULL) return(NULL);
1082

1083

1084
    max = 80;
1085
    ret = (xmlChar *) xmlMallocAtomic(max + 1);
1086
    if (ret == NULL) {
1087
        xmlURIErrMemory("saving URI\n");
1088
	return(NULL);
1089
    }
1090
    len = 0;
1091

1092
    if (uri->scheme != NULL) {
1093
	p = uri->scheme;
1094
	while (*p != 0) {
1095
	    if (len >= max) {
1096
                temp = xmlSaveUriRealloc(ret, &max);
1097
                if (temp == NULL) goto mem_error;
1098
		ret = temp;
1099
	    }
1100
	    ret[len++] = *p++;
1101
	}
1102
	if (len >= max) {
1103
            temp = xmlSaveUriRealloc(ret, &max);
1104
            if (temp == NULL) goto mem_error;
1105
            ret = temp;
1106
	}
1107
	ret[len++] = ':';
1108
    }
1109
    if (uri->opaque != NULL) {
1110
	p = uri->opaque;
1111
	while (*p != 0) {
1112
	    if (len + 3 >= max) {
1113
                temp = xmlSaveUriRealloc(ret, &max);
1114
                if (temp == NULL) goto mem_error;
1115
                ret = temp;
1116
	    }
1117
	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1118
		ret[len++] = *p++;
1119
	    else {
1120
		int val = *(unsigned char *)p++;
1121
		int hi = val / 0x10, lo = val % 0x10;
1122
		ret[len++] = '%';
1123
		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1124
		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1125
	    }
1126
	}
1127
    } else {
1128
	if ((uri->server != NULL) || (uri->port != PORT_EMPTY)) {
1129
	    if (len + 3 >= max) {
1130
                temp = xmlSaveUriRealloc(ret, &max);
1131
                if (temp == NULL) goto mem_error;
1132
                ret = temp;
1133
	    }
1134
	    ret[len++] = '/';
1135
	    ret[len++] = '/';
1136
	    if (uri->user != NULL) {
1137
		p = uri->user;
1138
		while (*p != 0) {
1139
		    if (len + 3 >= max) {
1140
                        temp = xmlSaveUriRealloc(ret, &max);
1141
                        if (temp == NULL) goto mem_error;
1142
                        ret = temp;
1143
		    }
1144
		    if ((IS_UNRESERVED(*(p))) ||
1145
			((*(p) == ';')) || ((*(p) == ':')) ||
1146
			((*(p) == '&')) || ((*(p) == '=')) ||
1147
			((*(p) == '+')) || ((*(p) == '$')) ||
1148
			((*(p) == ',')))
1149
			ret[len++] = *p++;
1150
		    else {
1151
			int val = *(unsigned char *)p++;
1152
			int hi = val / 0x10, lo = val % 0x10;
1153
			ret[len++] = '%';
1154
			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1155
			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1156
		    }
1157
		}
1158
		if (len + 3 >= max) {
1159
                    temp = xmlSaveUriRealloc(ret, &max);
1160
                    if (temp == NULL) goto mem_error;
1161
                    ret = temp;
1162
		}
1163
		ret[len++] = '@';
1164
	    }
1165
	    if (uri->server != NULL) {
1166
		p = uri->server;
1167
		while (*p != 0) {
1168
		    if (len >= max) {
1169
			temp = xmlSaveUriRealloc(ret, &max);
1170
			if (temp == NULL) goto mem_error;
1171
			ret = temp;
1172
		    }
1173
                    /* TODO: escaping? */
1174
		    ret[len++] = (xmlChar) *p++;
1175
		}
1176
	    }
1177
            if (uri->port > 0) {
1178
                if (len + 10 >= max) {
1179
                    temp = xmlSaveUriRealloc(ret, &max);
1180
                    if (temp == NULL) goto mem_error;
1181
                    ret = temp;
1182
                }
1183
                len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1184
            }
1185
	} else if (uri->authority != NULL) {
1186
	    if (len + 3 >= max) {
1187
                temp = xmlSaveUriRealloc(ret, &max);
1188
                if (temp == NULL) goto mem_error;
1189
                ret = temp;
1190
	    }
1191
	    ret[len++] = '/';
1192
	    ret[len++] = '/';
1193
	    p = uri->authority;
1194
	    while (*p != 0) {
1195
		if (len + 3 >= max) {
1196
                    temp = xmlSaveUriRealloc(ret, &max);
1197
                    if (temp == NULL) goto mem_error;
1198
                    ret = temp;
1199
		}
1200
		if ((IS_UNRESERVED(*(p))) ||
1201
                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1202
                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1203
                    ((*(p) == '=')) || ((*(p) == '+')))
1204
		    ret[len++] = *p++;
1205
		else {
1206
		    int val = *(unsigned char *)p++;
1207
		    int hi = val / 0x10, lo = val % 0x10;
1208
		    ret[len++] = '%';
1209
		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1210
		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1211
		}
1212
	    }
1213
	} else if (uri->scheme != NULL) {
1214
	    if (len + 3 >= max) {
1215
                temp = xmlSaveUriRealloc(ret, &max);
1216
                if (temp == NULL) goto mem_error;
1217
                ret = temp;
1218
	    }
1219
	}
1220
	if (uri->path != NULL) {
1221
	    p = uri->path;
1222
	    /*
1223
	     * the colon in file:///d: should not be escaped or
1224
	     * Windows accesses fail later.
1225
	     */
1226
	    if ((uri->scheme != NULL) &&
1227
		(p[0] == '/') &&
1228
		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1229
		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1230
		(p[2] == ':') &&
1231
	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1232
		if (len + 3 >= max) {
1233
                    temp = xmlSaveUriRealloc(ret, &max);
1234
                    if (temp == NULL) goto mem_error;
1235
                    ret = temp;
1236
		}
1237
		ret[len++] = *p++;
1238
		ret[len++] = *p++;
1239
		ret[len++] = *p++;
1240
	    }
1241
	    while (*p != 0) {
1242
		if (len + 3 >= max) {
1243
                    temp = xmlSaveUriRealloc(ret, &max);
1244
                    if (temp == NULL) goto mem_error;
1245
                    ret = temp;
1246
		}
1247
		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1248
                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1249
	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1250
	            ((*(p) == ',')))
1251
		    ret[len++] = *p++;
1252
		else {
1253
		    int val = *(unsigned char *)p++;
1254
		    int hi = val / 0x10, lo = val % 0x10;
1255
		    ret[len++] = '%';
1256
		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1257
		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1258
		}
1259
	    }
1260
	}
1261
	if (uri->query_raw != NULL) {
1262
	    if (len + 1 >= max) {
1263
                temp = xmlSaveUriRealloc(ret, &max);
1264
                if (temp == NULL) goto mem_error;
1265
                ret = temp;
1266
	    }
1267
	    ret[len++] = '?';
1268
	    p = uri->query_raw;
1269
	    while (*p != 0) {
1270
		if (len + 1 >= max) {
1271
                    temp = xmlSaveUriRealloc(ret, &max);
1272
                    if (temp == NULL) goto mem_error;
1273
                    ret = temp;
1274
		}
1275
		ret[len++] = *p++;
1276
	    }
1277
	} else if (uri->query != NULL) {
1278
	    if (len + 3 >= max) {
1279
                temp = xmlSaveUriRealloc(ret, &max);
1280
                if (temp == NULL) goto mem_error;
1281
                ret = temp;
1282
	    }
1283
	    ret[len++] = '?';
1284
	    p = uri->query;
1285
	    while (*p != 0) {
1286
		if (len + 3 >= max) {
1287
                    temp = xmlSaveUriRealloc(ret, &max);
1288
                    if (temp == NULL) goto mem_error;
1289
                    ret = temp;
1290
		}
1291
		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1292
		    ret[len++] = *p++;
1293
		else {
1294
		    int val = *(unsigned char *)p++;
1295
		    int hi = val / 0x10, lo = val % 0x10;
1296
		    ret[len++] = '%';
1297
		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1298
		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1299
		}
1300
	    }
1301
	}
1302
    }
1303
    if (uri->fragment != NULL) {
1304
	if (len + 3 >= max) {
1305
            temp = xmlSaveUriRealloc(ret, &max);
1306
            if (temp == NULL) goto mem_error;
1307
            ret = temp;
1308
	}
1309
	ret[len++] = '#';
1310
	p = uri->fragment;
1311
	while (*p != 0) {
1312
	    if (len + 3 >= max) {
1313
                temp = xmlSaveUriRealloc(ret, &max);
1314
                if (temp == NULL) goto mem_error;
1315
                ret = temp;
1316
	    }
1317
	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1318
		ret[len++] = *p++;
1319
	    else {
1320
		int val = *(unsigned char *)p++;
1321
		int hi = val / 0x10, lo = val % 0x10;
1322
		ret[len++] = '%';
1323
		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1324
		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1325
	    }
1326
	}
1327
    }
1328
    if (len >= max) {
1329
        temp = xmlSaveUriRealloc(ret, &max);
1330
        if (temp == NULL) goto mem_error;
1331
        ret = temp;
1332
    }
1333
    ret[len] = 0;
1334
    return(ret);
1335

1336
mem_error:
1337
    xmlFree(ret);
1338
    return(NULL);
1339
}
1340

1341
/**
1342
 * xmlPrintURI:
1343
 * @stream:  a FILE* for the output
1344
 * @uri:  pointer to an xmlURI
1345
 *
1346
 * Prints the URI in the stream @stream.
1347
 */
1348
void
1349
xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1350
    xmlChar *out;
1351

1352
    out = xmlSaveUri(uri);
1353
    if (out != NULL) {
1354
	fprintf(stream, "%s", (char *) out);
1355
	xmlFree(out);
1356
    }
1357
}
1358

1359
/**
1360
 * xmlCleanURI:
1361
 * @uri:  pointer to an xmlURI
1362
 *
1363
 * Make sure the xmlURI struct is free of content
1364
 */
1365
static void
1366
xmlCleanURI(xmlURIPtr uri) {
1367
    if (uri == NULL) return;
1368

1369
    if (uri->scheme != NULL) xmlFree(uri->scheme);
1370
    uri->scheme = NULL;
1371
    if (uri->server != NULL) xmlFree(uri->server);
1372
    uri->server = NULL;
1373
    if (uri->user != NULL) xmlFree(uri->user);
1374
    uri->user = NULL;
1375
    if (uri->path != NULL) xmlFree(uri->path);
1376
    uri->path = NULL;
1377
    if (uri->fragment != NULL) xmlFree(uri->fragment);
1378
    uri->fragment = NULL;
1379
    if (uri->opaque != NULL) xmlFree(uri->opaque);
1380
    uri->opaque = NULL;
1381
    if (uri->authority != NULL) xmlFree(uri->authority);
1382
    uri->authority = NULL;
1383
    if (uri->query != NULL) xmlFree(uri->query);
1384
    uri->query = NULL;
1385
    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1386
    uri->query_raw = NULL;
1387
}
1388

1389
/**
1390
 * xmlFreeURI:
1391
 * @uri:  pointer to an xmlURI
1392
 *
1393
 * Free up the xmlURI struct
1394
 */
1395
void
1396
xmlFreeURI(xmlURIPtr uri) {
1397
    if (uri == NULL) return;
1398

1399
    if (uri->scheme != NULL) xmlFree(uri->scheme);
1400
    if (uri->server != NULL) xmlFree(uri->server);
1401
    if (uri->user != NULL) xmlFree(uri->user);
1402
    if (uri->path != NULL) xmlFree(uri->path);
1403
    if (uri->fragment != NULL) xmlFree(uri->fragment);
1404
    if (uri->opaque != NULL) xmlFree(uri->opaque);
1405
    if (uri->authority != NULL) xmlFree(uri->authority);
1406
    if (uri->query != NULL) xmlFree(uri->query);
1407
    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1408
    xmlFree(uri);
1409
}
1410

1411
/************************************************************************
1412
 *									*
1413
 *			Helper functions				*
1414
 *									*
1415
 ************************************************************************/
1416

1417
/**
1418
 * xmlNormalizeURIPath:
1419
 * @path:  pointer to the path string
1420
 *
1421
 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1422
 * Section 5.2, steps 6.c through 6.g.
1423
 *
1424
 * Normalization occurs directly on the string, no new allocation is done
1425
 *
1426
 * Returns 0 or an error code
1427
 */
1428
int
1429
xmlNormalizeURIPath(char *path) {
1430
    char *cur, *out;
1431

1432
    if (path == NULL)
1433
	return(-1);
1434

1435
    /* Skip all initial "/" chars.  We want to get to the beginning of the
1436
     * first non-empty segment.
1437
     */
1438
    cur = path;
1439
    while (cur[0] == '/')
1440
      ++cur;
1441
    if (cur[0] == '\0')
1442
      return(0);
1443

1444
    /* Keep everything we've seen so far.  */
1445
    out = cur;
1446

1447
    /*
1448
     * Analyze each segment in sequence for cases (c) and (d).
1449
     */
1450
    while (cur[0] != '\0') {
1451
	/*
1452
	 * c) All occurrences of "./", where "." is a complete path segment,
1453
	 *    are removed from the buffer string.
1454
	 */
1455
	if ((cur[0] == '.') && (cur[1] == '/')) {
1456
	    cur += 2;
1457
	    /* '//' normalization should be done at this point too */
1458
	    while (cur[0] == '/')
1459
		cur++;
1460
	    continue;
1461
	}
1462

1463
	/*
1464
	 * d) If the buffer string ends with "." as a complete path segment,
1465
	 *    that "." is removed.
1466
	 */
1467
	if ((cur[0] == '.') && (cur[1] == '\0'))
1468
	    break;
1469

1470
	/* Otherwise keep the segment.  */
1471
	while (cur[0] != '/') {
1472
            if (cur[0] == '\0')
1473
              goto done_cd;
1474
	    (out++)[0] = (cur++)[0];
1475
	}
1476
	/* normalize // */
1477
	while ((cur[0] == '/') && (cur[1] == '/'))
1478
	    cur++;
1479

1480
        (out++)[0] = (cur++)[0];
1481
    }
1482
 done_cd:
1483
    out[0] = '\0';
1484

1485
    /* Reset to the beginning of the first segment for the next sequence.  */
1486
    cur = path;
1487
    while (cur[0] == '/')
1488
      ++cur;
1489
    if (cur[0] == '\0')
1490
	return(0);
1491

1492
    /*
1493
     * Analyze each segment in sequence for cases (e) and (f).
1494
     *
1495
     * e) All occurrences of "<segment>/../", where <segment> is a
1496
     *    complete path segment not equal to "..", are removed from the
1497
     *    buffer string.  Removal of these path segments is performed
1498
     *    iteratively, removing the leftmost matching pattern on each
1499
     *    iteration, until no matching pattern remains.
1500
     *
1501
     * f) If the buffer string ends with "<segment>/..", where <segment>
1502
     *    is a complete path segment not equal to "..", that
1503
     *    "<segment>/.." is removed.
1504
     *
1505
     * To satisfy the "iterative" clause in (e), we need to collapse the
1506
     * string every time we find something that needs to be removed.  Thus,
1507
     * we don't need to keep two pointers into the string: we only need a
1508
     * "current position" pointer.
1509
     */
1510
    while (1) {
1511
        char *segp, *tmp;
1512

1513
        /* At the beginning of each iteration of this loop, "cur" points to
1514
         * the first character of the segment we want to examine.
1515
         */
1516

1517
        /* Find the end of the current segment.  */
1518
        segp = cur;
1519
        while ((segp[0] != '/') && (segp[0] != '\0'))
1520
          ++segp;
1521

1522
        /* If this is the last segment, we're done (we need at least two
1523
         * segments to meet the criteria for the (e) and (f) cases).
1524
         */
1525
        if (segp[0] == '\0')
1526
          break;
1527

1528
        /* If the first segment is "..", or if the next segment _isn't_ "..",
1529
         * keep this segment and try the next one.
1530
         */
1531
        ++segp;
1532
        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1533
            || ((segp[0] != '.') || (segp[1] != '.')
1534
                || ((segp[2] != '/') && (segp[2] != '\0')))) {
1535
          cur = segp;
1536
          continue;
1537
        }
1538

1539
        /* If we get here, remove this segment and the next one and back up
1540
         * to the previous segment (if there is one), to implement the
1541
         * "iteratively" clause.  It's pretty much impossible to back up
1542
         * while maintaining two pointers into the buffer, so just compact
1543
         * the whole buffer now.
1544
         */
1545

1546
        /* If this is the end of the buffer, we're done.  */
1547
        if (segp[2] == '\0') {
1548
          cur[0] = '\0';
1549
          break;
1550
        }
1551
        /* Valgrind complained, strcpy(cur, segp + 3); */
1552
        /* string will overlap, do not use strcpy */
1553
        tmp = cur;
1554
        segp += 3;
1555
        while ((*tmp++ = *segp++) != 0)
1556
          ;
1557

1558
        /* If there are no previous segments, then keep going from here.  */
1559
        segp = cur;
1560
        while ((segp > path) && ((--segp)[0] == '/'))
1561
          ;
1562
        if (segp == path)
1563
          continue;
1564

1565
        /* "segp" is pointing to the end of a previous segment; find it's
1566
         * start.  We need to back up to the previous segment and start
1567
         * over with that to handle things like "foo/bar/../..".  If we
1568
         * don't do this, then on the first pass we'll remove the "bar/..",
1569
         * but be pointing at the second ".." so we won't realize we can also
1570
         * remove the "foo/..".
1571
         */
1572
        cur = segp;
1573
        while ((cur > path) && (cur[-1] != '/'))
1574
          --cur;
1575
    }
1576
    out[0] = '\0';
1577

1578
    /*
1579
     * g) If the resulting buffer string still begins with one or more
1580
     *    complete path segments of "..", then the reference is
1581
     *    considered to be in error. Implementations may handle this
1582
     *    error by retaining these components in the resolved path (i.e.,
1583
     *    treating them as part of the final URI), by removing them from
1584
     *    the resolved path (i.e., discarding relative levels above the
1585
     *    root), or by avoiding traversal of the reference.
1586
     *
1587
     * We discard them from the final path.
1588
     */
1589
    if (path[0] == '/') {
1590
      cur = path;
1591
      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1592
             && ((cur[3] == '/') || (cur[3] == '\0')))
1593
	cur += 3;
1594

1595
      if (cur != path) {
1596
	out = path;
1597
	while (cur[0] != '\0')
1598
          (out++)[0] = (cur++)[0];
1599
	out[0] = 0;
1600
      }
1601
    }
1602

1603
    return(0);
1604
}
1605

1606
static int is_hex(char c) {
1607
    if (((c >= '0') && (c <= '9')) ||
1608
        ((c >= 'a') && (c <= 'f')) ||
1609
        ((c >= 'A') && (c <= 'F')))
1610
	return(1);
1611
    return(0);
1612
}
1613

1614
/**
1615
 * xmlURIUnescapeString:
1616
 * @str:  the string to unescape
1617
 * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1618
 * @target:  optional destination buffer
1619
 *
1620
 * Unescaping routine, but does not check that the string is an URI. The
1621
 * output is a direct unsigned char translation of %XX values (no encoding)
1622
 * Note that the length of the result can only be smaller or same size as
1623
 * the input string.
1624
 *
1625
 * Returns a copy of the string, but unescaped, will return NULL only in case
1626
 * of error
1627
 */
1628
char *
1629
xmlURIUnescapeString(const char *str, int len, char *target) {
1630
    char *ret, *out;
1631
    const char *in;
1632

1633
    if (str == NULL)
1634
	return(NULL);
1635
    if (len <= 0) len = strlen(str);
1636
    if (len < 0) return(NULL);
1637

1638
    if (target == NULL) {
1639
	ret = (char *) xmlMallocAtomic(len + 1);
1640
	if (ret == NULL) {
1641
            xmlURIErrMemory("unescaping URI value\n");
1642
	    return(NULL);
1643
	}
1644
    } else
1645
	ret = target;
1646
    in = str;
1647
    out = ret;
1648
    while(len > 0) {
1649
	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1650
            int c = 0;
1651
	    in++;
1652
	    if ((*in >= '0') && (*in <= '9'))
1653
	        c = (*in - '0');
1654
	    else if ((*in >= 'a') && (*in <= 'f'))
1655
	        c = (*in - 'a') + 10;
1656
	    else if ((*in >= 'A') && (*in <= 'F'))
1657
	        c = (*in - 'A') + 10;
1658
	    in++;
1659
	    if ((*in >= '0') && (*in <= '9'))
1660
	        c = c * 16 + (*in - '0');
1661
	    else if ((*in >= 'a') && (*in <= 'f'))
1662
	        c = c * 16 + (*in - 'a') + 10;
1663
	    else if ((*in >= 'A') && (*in <= 'F'))
1664
	        c = c * 16 + (*in - 'A') + 10;
1665
	    in++;
1666
	    len -= 3;
1667
            /* Explicit sign change */
1668
	    *out++ = (char) c;
1669
	} else {
1670
	    *out++ = *in++;
1671
	    len--;
1672
	}
1673
    }
1674
    *out = 0;
1675
    return(ret);
1676
}
1677

1678
/**
1679
 * xmlURIEscapeStr:
1680
 * @str:  string to escape
1681
 * @list: exception list string of chars not to escape
1682
 *
1683
 * This routine escapes a string to hex, ignoring reserved characters
1684
 * (a-z, A-Z, 0-9, "@-_.!~*'()") and the characters in the exception list.
1685
 *
1686
 * Returns a new escaped string or NULL in case of error.
1687
 */
1688
xmlChar *
1689
xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1690
    xmlChar *ret, ch;
1691
    xmlChar *temp;
1692
    const xmlChar *in;
1693
    int len, out;
1694

1695
    if (str == NULL)
1696
	return(NULL);
1697
    if (str[0] == 0)
1698
	return(xmlStrdup(str));
1699
    len = xmlStrlen(str);
1700
    if (!(len > 0)) return(NULL);
1701

1702
    len += 20;
1703
    ret = (xmlChar *) xmlMallocAtomic(len);
1704
    if (ret == NULL) {
1705
        xmlURIErrMemory("escaping URI value\n");
1706
	return(NULL);
1707
    }
1708
    in = (const xmlChar *) str;
1709
    out = 0;
1710
    while(*in != 0) {
1711
	if (len - out <= 3) {
1712
            temp = xmlSaveUriRealloc(ret, &len);
1713
	    if (temp == NULL) {
1714
                xmlURIErrMemory("escaping URI value\n");
1715
		xmlFree(ret);
1716
		return(NULL);
1717
	    }
1718
	    ret = temp;
1719
	}
1720

1721
	ch = *in;
1722

1723
	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1724
	    unsigned char val;
1725
	    ret[out++] = '%';
1726
	    val = ch >> 4;
1727
	    if (val <= 9)
1728
		ret[out++] = '0' + val;
1729
	    else
1730
		ret[out++] = 'A' + val - 0xA;
1731
	    val = ch & 0xF;
1732
	    if (val <= 9)
1733
		ret[out++] = '0' + val;
1734
	    else
1735
		ret[out++] = 'A' + val - 0xA;
1736
	    in++;
1737
	} else {
1738
	    ret[out++] = *in++;
1739
	}
1740

1741
    }
1742
    ret[out] = 0;
1743
    return(ret);
1744
}
1745

1746
/**
1747
 * xmlURIEscape:
1748
 * @str:  the string of the URI to escape
1749
 *
1750
 * Escaping routine, does not do validity checks !
1751
 * It will try to escape the chars needing this, but this is heuristic
1752
 * based it's impossible to be sure.
1753
 *
1754
 * Returns an copy of the string, but escaped
1755
 *
1756
 * 25 May 2001
1757
 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1758
 * according to RFC2396.
1759
 *   - Carl Douglas
1760
 */
1761
xmlChar *
1762
xmlURIEscape(const xmlChar * str)
1763
{
1764
    xmlChar *ret, *segment = NULL;
1765
    xmlURIPtr uri;
1766
    int ret2;
1767

1768
    if (str == NULL)
1769
        return (NULL);
1770

1771
    uri = xmlCreateURI();
1772
    if (uri != NULL) {
1773
	/*
1774
	 * Allow escaping errors in the unescaped form
1775
	 */
1776
        uri->cleanup = 1;
1777
        ret2 = xmlParseURIReference(uri, (const char *)str);
1778
        if (ret2) {
1779
            xmlFreeURI(uri);
1780
            return (NULL);
1781
        }
1782
    }
1783

1784
    if (!uri)
1785
        return NULL;
1786

1787
    ret = NULL;
1788

1789
#define NULLCHK(p) if(!p) { \
1790
         xmlURIErrMemory("escaping URI value\n"); \
1791
         xmlFreeURI(uri); \
1792
         xmlFree(ret); \
1793
         return NULL; } \
1794

1795
    if (uri->scheme) {
1796
        segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1797
        NULLCHK(segment)
1798
        ret = xmlStrcat(ret, segment);
1799
        ret = xmlStrcat(ret, BAD_CAST ":");
1800
        xmlFree(segment);
1801
    }
1802

1803
    if (uri->authority) {
1804
        segment =
1805
            xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1806
        NULLCHK(segment)
1807
        ret = xmlStrcat(ret, BAD_CAST "//");
1808
        ret = xmlStrcat(ret, segment);
1809
        xmlFree(segment);
1810
    }
1811

1812
    if (uri->user) {
1813
        segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1814
        NULLCHK(segment)
1815
        ret = xmlStrcat(ret,BAD_CAST "//");
1816
        ret = xmlStrcat(ret, segment);
1817
        ret = xmlStrcat(ret, BAD_CAST "@");
1818
        xmlFree(segment);
1819
    }
1820

1821
    if (uri->server) {
1822
        segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1823
        NULLCHK(segment)
1824
        if (uri->user == NULL)
1825
            ret = xmlStrcat(ret, BAD_CAST "//");
1826
        ret = xmlStrcat(ret, segment);
1827
        xmlFree(segment);
1828
    }
1829

1830
    if (uri->port > 0) {
1831
        xmlChar port[11];
1832

1833
        snprintf((char *) port, 11, "%d", uri->port);
1834
        ret = xmlStrcat(ret, BAD_CAST ":");
1835
        ret = xmlStrcat(ret, port);
1836
    }
1837

1838
    if (uri->path) {
1839
        segment =
1840
            xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1841
        NULLCHK(segment)
1842
        ret = xmlStrcat(ret, segment);
1843
        xmlFree(segment);
1844
    }
1845

1846
    if (uri->query_raw) {
1847
        ret = xmlStrcat(ret, BAD_CAST "?");
1848
        ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1849
    }
1850
    else if (uri->query) {
1851
        segment =
1852
            xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1853
        NULLCHK(segment)
1854
        ret = xmlStrcat(ret, BAD_CAST "?");
1855
        ret = xmlStrcat(ret, segment);
1856
        xmlFree(segment);
1857
    }
1858

1859
    if (uri->opaque) {
1860
        segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1861
        NULLCHK(segment)
1862
        ret = xmlStrcat(ret, segment);
1863
        xmlFree(segment);
1864
    }
1865

1866
    if (uri->fragment) {
1867
        segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1868
        NULLCHK(segment)
1869
        ret = xmlStrcat(ret, BAD_CAST "#");
1870
        ret = xmlStrcat(ret, segment);
1871
        xmlFree(segment);
1872
    }
1873

1874
    xmlFreeURI(uri);
1875
#undef NULLCHK
1876

1877
    return (ret);
1878
}
1879

1880
/************************************************************************
1881
 *									*
1882
 *			Public functions				*
1883
 *									*
1884
 ************************************************************************/
1885

1886
/**
1887
 * xmlBuildURI:
1888
 * @URI:  the URI instance found in the document
1889
 * @base:  the base value
1890
 *
1891
 * Computes he final URI of the reference done by checking that
1892
 * the given URI is valid, and building the final URI using the
1893
 * base URI. This is processed according to section 5.2 of the
1894
 * RFC 2396
1895
 *
1896
 * 5.2. Resolving Relative References to Absolute Form
1897
 *
1898
 * Returns a new URI string (to be freed by the caller) or NULL in case
1899
 *         of error.
1900
 */
1901
xmlChar *
1902
xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1903
    xmlChar *val = NULL;
1904
    int ret, len, indx, cur, out;
1905
    xmlURIPtr ref = NULL;
1906
    xmlURIPtr bas = NULL;
1907
    xmlURIPtr res = NULL;
1908

1909
    /*
1910
     * 1) The URI reference is parsed into the potential four components and
1911
     *    fragment identifier, as described in Section 4.3.
1912
     *
1913
     *    NOTE that a completely empty URI is treated by modern browsers
1914
     *    as a reference to "." rather than as a synonym for the current
1915
     *    URI.  Should we do that here?
1916
     */
1917
    if (URI == NULL)
1918
	ret = -1;
1919
    else {
1920
	if (*URI) {
1921
	    ref = xmlCreateURI();
1922
	    if (ref == NULL)
1923
		goto done;
1924
	    ret = xmlParseURIReference(ref, (const char *) URI);
1925
	}
1926
	else
1927
	    ret = 0;
1928
    }
1929
    if (ret != 0)
1930
	goto done;
1931
    if ((ref != NULL) && (ref->scheme != NULL)) {
1932
	/*
1933
	 * The URI is absolute don't modify.
1934
	 */
1935
	val = xmlStrdup(URI);
1936
	goto done;
1937
    }
1938
    if (base == NULL)
1939
	ret = -1;
1940
    else {
1941
	bas = xmlCreateURI();
1942
	if (bas == NULL)
1943
	    goto done;
1944
	ret = xmlParseURIReference(bas, (const char *) base);
1945
    }
1946
    if (ret != 0) {
1947
	if (ref)
1948
	    val = xmlSaveUri(ref);
1949
	goto done;
1950
    }
1951
    if (ref == NULL) {
1952
	/*
1953
	 * the base fragment must be ignored
1954
	 */
1955
	if (bas->fragment != NULL) {
1956
	    xmlFree(bas->fragment);
1957
	    bas->fragment = NULL;
1958
	}
1959
	val = xmlSaveUri(bas);
1960
	goto done;
1961
    }
1962

1963
    /*
1964
     * 2) If the path component is empty and the scheme, authority, and
1965
     *    query components are undefined, then it is a reference to the
1966
     *    current document and we are done.  Otherwise, the reference URI's
1967
     *    query and fragment components are defined as found (or not found)
1968
     *    within the URI reference and not inherited from the base URI.
1969
     *
1970
     *    NOTE that in modern browsers, the parsing differs from the above
1971
     *    in the following aspect:  the query component is allowed to be
1972
     *    defined while still treating this as a reference to the current
1973
     *    document.
1974
     */
1975
    res = xmlCreateURI();
1976
    if (res == NULL)
1977
	goto done;
1978
    if ((ref->scheme == NULL) && (ref->path == NULL) &&
1979
	((ref->authority == NULL) && (ref->server == NULL) &&
1980
         (ref->port == PORT_EMPTY))) {
1981
	if (bas->scheme != NULL)
1982
	    res->scheme = xmlMemStrdup(bas->scheme);
1983
	if (bas->authority != NULL)
1984
	    res->authority = xmlMemStrdup(bas->authority);
1985
	else {
1986
	    if (bas->server != NULL)
1987
		res->server = xmlMemStrdup(bas->server);
1988
	    if (bas->user != NULL)
1989
		res->user = xmlMemStrdup(bas->user);
1990
	    res->port = bas->port;
1991
	}
1992
	if (bas->path != NULL)
1993
	    res->path = xmlMemStrdup(bas->path);
1994
	if (ref->query_raw != NULL)
1995
	    res->query_raw = xmlMemStrdup (ref->query_raw);
1996
	else if (ref->query != NULL)
1997
	    res->query = xmlMemStrdup(ref->query);
1998
	else if (bas->query_raw != NULL)
1999
	    res->query_raw = xmlMemStrdup(bas->query_raw);
2000
	else if (bas->query != NULL)
2001
	    res->query = xmlMemStrdup(bas->query);
2002
	if (ref->fragment != NULL)
2003
	    res->fragment = xmlMemStrdup(ref->fragment);
2004
	goto step_7;
2005
    }
2006

2007
    /*
2008
     * 3) If the scheme component is defined, indicating that the reference
2009
     *    starts with a scheme name, then the reference is interpreted as an
2010
     *    absolute URI and we are done.  Otherwise, the reference URI's
2011
     *    scheme is inherited from the base URI's scheme component.
2012
     */
2013
    if (ref->scheme != NULL) {
2014
	val = xmlSaveUri(ref);
2015
	goto done;
2016
    }
2017
    if (bas->scheme != NULL)
2018
	res->scheme = xmlMemStrdup(bas->scheme);
2019

2020
    if (ref->query_raw != NULL)
2021
	res->query_raw = xmlMemStrdup(ref->query_raw);
2022
    else if (ref->query != NULL)
2023
	res->query = xmlMemStrdup(ref->query);
2024
    if (ref->fragment != NULL)
2025
	res->fragment = xmlMemStrdup(ref->fragment);
2026

2027
    /*
2028
     * 4) If the authority component is defined, then the reference is a
2029
     *    network-path and we skip to step 7.  Otherwise, the reference
2030
     *    URI's authority is inherited from the base URI's authority
2031
     *    component, which will also be undefined if the URI scheme does not
2032
     *    use an authority component.
2033
     */
2034
    if ((ref->authority != NULL) || (ref->server != NULL) ||
2035
         (ref->port != PORT_EMPTY)) {
2036
	if (ref->authority != NULL)
2037
	    res->authority = xmlMemStrdup(ref->authority);
2038
	else {
2039
            if (ref->server != NULL)
2040
                res->server = xmlMemStrdup(ref->server);
2041
	    if (ref->user != NULL)
2042
		res->user = xmlMemStrdup(ref->user);
2043
            res->port = ref->port;
2044
	}
2045
	if (ref->path != NULL)
2046
	    res->path = xmlMemStrdup(ref->path);
2047
	goto step_7;
2048
    }
2049
    if (bas->authority != NULL)
2050
	res->authority = xmlMemStrdup(bas->authority);
2051
    else if ((bas->server != NULL) || (bas->port != PORT_EMPTY)) {
2052
	if (bas->server != NULL)
2053
	    res->server = xmlMemStrdup(bas->server);
2054
	if (bas->user != NULL)
2055
	    res->user = xmlMemStrdup(bas->user);
2056
	res->port = bas->port;
2057
    }
2058

2059
    /*
2060
     * 5) If the path component begins with a slash character ("/"), then
2061
     *    the reference is an absolute-path and we skip to step 7.
2062
     */
2063
    if ((ref->path != NULL) && (ref->path[0] == '/')) {
2064
	res->path = xmlMemStrdup(ref->path);
2065
	goto step_7;
2066
    }
2067

2068

2069
    /*
2070
     * 6) If this step is reached, then we are resolving a relative-path
2071
     *    reference.  The relative path needs to be merged with the base
2072
     *    URI's path.  Although there are many ways to do this, we will
2073
     *    describe a simple method using a separate string buffer.
2074
     *
2075
     * Allocate a buffer large enough for the result string.
2076
     */
2077
    len = 2; /* extra / and 0 */
2078
    if (ref->path != NULL)
2079
	len += strlen(ref->path);
2080
    if (bas->path != NULL)
2081
	len += strlen(bas->path);
2082
    res->path = (char *) xmlMallocAtomic(len);
2083
    if (res->path == NULL) {
2084
        xmlURIErrMemory("resolving URI against base\n");
2085
	goto done;
2086
    }
2087
    res->path[0] = 0;
2088

2089
    /*
2090
     * a) All but the last segment of the base URI's path component is
2091
     *    copied to the buffer.  In other words, any characters after the
2092
     *    last (right-most) slash character, if any, are excluded.
2093
     */
2094
    cur = 0;
2095
    out = 0;
2096
    if (bas->path != NULL) {
2097
	while (bas->path[cur] != 0) {
2098
	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2099
		cur++;
2100
	    if (bas->path[cur] == 0)
2101
		break;
2102

2103
	    cur++;
2104
	    while (out < cur) {
2105
		res->path[out] = bas->path[out];
2106
		out++;
2107
	    }
2108
	}
2109
    }
2110
    res->path[out] = 0;
2111

2112
    /*
2113
     * b) The reference's path component is appended to the buffer
2114
     *    string.
2115
     */
2116
    if (ref->path != NULL && ref->path[0] != 0) {
2117
	indx = 0;
2118
	/*
2119
	 * Ensure the path includes a '/'
2120
	 */
2121
	if ((out == 0) && ((bas->server != NULL) || bas->port != PORT_EMPTY))
2122
	    res->path[out++] = '/';
2123
	while (ref->path[indx] != 0) {
2124
	    res->path[out++] = ref->path[indx++];
2125
	}
2126
    }
2127
    res->path[out] = 0;
2128

2129
    /*
2130
     * Steps c) to h) are really path normalization steps
2131
     */
2132
    xmlNormalizeURIPath(res->path);
2133

2134
step_7:
2135

2136
    /*
2137
     * 7) The resulting URI components, including any inherited from the
2138
     *    base URI, are recombined to give the absolute form of the URI
2139
     *    reference.
2140
     */
2141
    val = xmlSaveUri(res);
2142

2143
done:
2144
    if (ref != NULL)
2145
	xmlFreeURI(ref);
2146
    if (bas != NULL)
2147
	xmlFreeURI(bas);
2148
    if (res != NULL)
2149
	xmlFreeURI(res);
2150
    return(val);
2151
}
2152

2153
/**
2154
 * xmlBuildRelativeURI:
2155
 * @URI:  the URI reference under consideration
2156
 * @base:  the base value
2157
 *
2158
 * Expresses the URI of the reference in terms relative to the
2159
 * base.  Some examples of this operation include:
2160
 *     base = "http://site1.com/docs/book1.html"
2161
 *        URI input                        URI returned
2162
 *     docs/pic1.gif                    pic1.gif
2163
 *     docs/img/pic1.gif                img/pic1.gif
2164
 *     img/pic1.gif                     ../img/pic1.gif
2165
 *     http://site1.com/docs/pic1.gif   pic1.gif
2166
 *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2167
 *
2168
 *     base = "docs/book1.html"
2169
 *        URI input                        URI returned
2170
 *     docs/pic1.gif                    pic1.gif
2171
 *     docs/img/pic1.gif                img/pic1.gif
2172
 *     img/pic1.gif                     ../img/pic1.gif
2173
 *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2174
 *
2175
 *
2176
 * Note: if the URI reference is really weird or complicated, it may be
2177
 *       worthwhile to first convert it into a "nice" one by calling
2178
 *       xmlBuildURI (using 'base') before calling this routine,
2179
 *       since this routine (for reasonable efficiency) assumes URI has
2180
 *       already been through some validation.
2181
 *
2182
 * Returns a new URI string (to be freed by the caller) or NULL in case
2183
 * error.
2184
 */
2185
xmlChar *
2186
xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2187
{
2188
    xmlChar *val = NULL;
2189
    int ret;
2190
    int ix;
2191
    int nbslash = 0;
2192
    int len;
2193
    xmlURIPtr ref = NULL;
2194
    xmlURIPtr bas = NULL;
2195
    xmlChar *bptr, *uptr, *vptr;
2196
    int remove_path = 0;
2197

2198
    if ((URI == NULL) || (*URI == 0))
2199
	return NULL;
2200

2201
    /*
2202
     * First parse URI into a standard form
2203
     */
2204
    ref = xmlCreateURI ();
2205
    if (ref == NULL)
2206
	return NULL;
2207
    /* If URI not already in "relative" form */
2208
    if (URI[0] != '.') {
2209
	ret = xmlParseURIReference (ref, (const char *) URI);
2210
	if (ret != 0)
2211
	    goto done;		/* Error in URI, return NULL */
2212
    } else
2213
	ref->path = (char *)xmlStrdup(URI);
2214

2215
    /*
2216
     * Next parse base into the same standard form
2217
     */
2218
    if ((base == NULL) || (*base == 0)) {
2219
	val = xmlStrdup (URI);
2220
	goto done;
2221
    }
2222
    bas = xmlCreateURI ();
2223
    if (bas == NULL)
2224
	goto done;
2225
    if (base[0] != '.') {
2226
	ret = xmlParseURIReference (bas, (const char *) base);
2227
	if (ret != 0)
2228
	    goto done;		/* Error in base, return NULL */
2229
    } else
2230
	bas->path = (char *)xmlStrdup(base);
2231

2232
    /*
2233
     * If the scheme / server on the URI differs from the base,
2234
     * just return the URI
2235
     */
2236
    if ((ref->scheme != NULL) &&
2237
	((bas->scheme == NULL) ||
2238
	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2239
	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)) ||
2240
         (bas->port != ref->port))) {
2241
	val = xmlStrdup (URI);
2242
	goto done;
2243
    }
2244
    if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2245
	val = xmlStrdup(BAD_CAST "");
2246
	goto done;
2247
    }
2248
    if (bas->path == NULL) {
2249
	val = xmlStrdup((xmlChar *)ref->path);
2250
	goto done;
2251
    }
2252
    if (ref->path == NULL) {
2253
        ref->path = (char *) "/";
2254
	remove_path = 1;
2255
    }
2256

2257
    /*
2258
     * At this point (at last!) we can compare the two paths
2259
     *
2260
     * First we take care of the special case where either of the
2261
     * two path components may be missing (bug 316224)
2262
     */
2263
    bptr = (xmlChar *)bas->path;
2264
    {
2265
        xmlChar *rptr = (xmlChar *) ref->path;
2266
        int pos = 0;
2267

2268
        /*
2269
         * Next we compare the two strings and find where they first differ
2270
         */
2271
	if ((*rptr == '.') && (rptr[1] == '/'))
2272
            rptr += 2;
2273
	if ((*bptr == '.') && (bptr[1] == '/'))
2274
            bptr += 2;
2275
	else if ((*bptr == '/') && (*rptr != '/'))
2276
	    bptr++;
2277
	while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2278
	    pos++;
2279

2280
	if (bptr[pos] == rptr[pos]) {
2281
	    val = xmlStrdup(BAD_CAST "");
2282
	    goto done;		/* (I can't imagine why anyone would do this) */
2283
	}
2284

2285
	/*
2286
	 * In URI, "back up" to the last '/' encountered.  This will be the
2287
	 * beginning of the "unique" suffix of URI
2288
	 */
2289
	ix = pos;
2290
	for (; ix > 0; ix--) {
2291
	    if (rptr[ix - 1] == '/')
2292
		break;
2293
	}
2294
	uptr = (xmlChar *)&rptr[ix];
2295

2296
	/*
2297
	 * In base, count the number of '/' from the differing point
2298
	 */
2299
	for (; bptr[ix] != 0; ix++) {
2300
	    if (bptr[ix] == '/')
2301
		nbslash++;
2302
	}
2303

2304
	/*
2305
	 * e.g: URI="foo/" base="foo/bar" -> "./"
2306
	 */
2307
	if (nbslash == 0 && !uptr[0]) {
2308
	    val = xmlStrdup(BAD_CAST "./");
2309
	    goto done;
2310
	}
2311

2312
	len = xmlStrlen (uptr) + 1;
2313
    }
2314

2315
    if (nbslash == 0) {
2316
	if (uptr != NULL)
2317
	    /* exception characters from xmlSaveUri */
2318
	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2319
	goto done;
2320
    }
2321

2322
    /*
2323
     * Allocate just enough space for the returned string -
2324
     * length of the remainder of the URI, plus enough space
2325
     * for the "../" groups, plus one for the terminator
2326
     */
2327
    val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2328
    if (val == NULL) {
2329
        xmlURIErrMemory("building relative URI\n");
2330
	goto done;
2331
    }
2332
    vptr = val;
2333
    /*
2334
     * Put in as many "../" as needed
2335
     */
2336
    for (; nbslash>0; nbslash--) {
2337
	*vptr++ = '.';
2338
	*vptr++ = '.';
2339
	*vptr++ = '/';
2340
    }
2341
    /*
2342
     * Finish up with the end of the URI
2343
     */
2344
    if (uptr != NULL) {
2345
        if ((vptr > val) && (len > 0) &&
2346
	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2347
	    memcpy (vptr, uptr + 1, len - 1);
2348
	    vptr[len - 2] = 0;
2349
	} else {
2350
	    memcpy (vptr, uptr, len);
2351
	    vptr[len - 1] = 0;
2352
	}
2353
    } else {
2354
	vptr[len - 1] = 0;
2355
    }
2356

2357
    /* escape the freshly-built path */
2358
    vptr = val;
2359
	/* exception characters from xmlSaveUri */
2360
    val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2361
    xmlFree(vptr);
2362

2363
done:
2364
    /*
2365
     * Free the working variables
2366
     */
2367
    if (remove_path != 0)
2368
        ref->path = NULL;
2369
    if (ref != NULL)
2370
	xmlFreeURI (ref);
2371
    if (bas != NULL)
2372
	xmlFreeURI (bas);
2373

2374
    return val;
2375
}
2376

2377
/**
2378
 * xmlCanonicPath:
2379
 * @path:  the resource locator in a filesystem notation
2380
 *
2381
 * Constructs a canonic path from the specified path.
2382
 *
2383
 * Returns a new canonic path, or a duplicate of the path parameter if the
2384
 * construction fails. The caller is responsible for freeing the memory occupied
2385
 * by the returned string. If there is insufficient memory available, or the
2386
 * argument is NULL, the function returns NULL.
2387
 */
2388
#define IS_WINDOWS_PATH(p)					\
2389
	((p != NULL) &&						\
2390
	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2391
	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2392
	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2393
xmlChar *
2394
xmlCanonicPath(const xmlChar *path)
2395
{
2396
/*
2397
 * For Windows implementations, additional work needs to be done to
2398
 * replace backslashes in pathnames with "forward slashes"
2399
 */
2400
#if defined(_WIN32)
2401
    int len = 0;
2402
    char *p = NULL;
2403
#endif
2404
    xmlURIPtr uri;
2405
    xmlChar *ret;
2406
    const xmlChar *absuri;
2407

2408
    if (path == NULL)
2409
	return(NULL);
2410

2411
#if defined(_WIN32)
2412
    /*
2413
     * We must not change the backslashes to slashes if the the path
2414
     * starts with \\?\
2415
     * Those paths can be up to 32k characters long.
2416
     * Was added specifically for OpenOffice, those paths can't be converted
2417
     * to URIs anyway.
2418
     */
2419
    if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2420
        (path[3] == '\\') )
2421
	return xmlStrdup((const xmlChar *) path);
2422
#endif
2423

2424
	/* sanitize filename starting with // so it can be used as URI */
2425
    if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2426
        path++;
2427

2428
    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2429
	xmlFreeURI(uri);
2430
	return xmlStrdup(path);
2431
    }
2432

2433
    /* Check if this is an "absolute uri" */
2434
    absuri = xmlStrstr(path, BAD_CAST "://");
2435
    if (absuri != NULL) {
2436
        int l, j;
2437
	unsigned char c;
2438
	xmlChar *escURI;
2439

2440
        /*
2441
	 * this looks like an URI where some parts have not been
2442
	 * escaped leading to a parsing problem.  Check that the first
2443
	 * part matches a protocol.
2444
	 */
2445
	l = absuri - path;
2446
	/* Bypass if first part (part before the '://') is > 20 chars */
2447
	if ((l <= 0) || (l > 20))
2448
	    goto path_processing;
2449
	/* Bypass if any non-alpha characters are present in first part */
2450
	for (j = 0;j < l;j++) {
2451
	    c = path[j];
2452
	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2453
	        goto path_processing;
2454
	}
2455

2456
	/* Escape all except the characters specified in the supplied path */
2457
        escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2458
	if (escURI != NULL) {
2459
	    /* Try parsing the escaped path */
2460
	    uri = xmlParseURI((const char *) escURI);
2461
	    /* If successful, return the escaped string */
2462
	    if (uri != NULL) {
2463
	        xmlFreeURI(uri);
2464
		return escURI;
2465
	    }
2466
            xmlFree(escURI);
2467
	}
2468
    }
2469

2470
path_processing:
2471
/* For Windows implementations, replace backslashes with 'forward slashes' */
2472
#if defined(_WIN32)
2473
    /*
2474
     * Create a URI structure
2475
     */
2476
    uri = xmlCreateURI();
2477
    if (uri == NULL) {		/* Guard against 'out of memory' */
2478
        return(NULL);
2479
    }
2480

2481
    len = xmlStrlen(path);
2482
    if ((len > 2) && IS_WINDOWS_PATH(path)) {
2483
        /* make the scheme 'file' */
2484
	uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2485
	/* allocate space for leading '/' + path + string terminator */
2486
	uri->path = xmlMallocAtomic(len + 2);
2487
	if (uri->path == NULL) {
2488
	    xmlFreeURI(uri);	/* Guard against 'out of memory' */
2489
	    return(NULL);
2490
	}
2491
	/* Put in leading '/' plus path */
2492
	uri->path[0] = '/';
2493
	p = uri->path + 1;
2494
	strncpy(p, (char *) path, len + 1);
2495
    } else {
2496
	uri->path = (char *) xmlStrdup(path);
2497
	if (uri->path == NULL) {
2498
	    xmlFreeURI(uri);
2499
	    return(NULL);
2500
	}
2501
	p = uri->path;
2502
    }
2503
    /* Now change all occurrences of '\' to '/' */
2504
    while (*p != '\0') {
2505
	if (*p == '\\')
2506
	    *p = '/';
2507
	p++;
2508
    }
2509

2510
    if (uri->scheme == NULL) {
2511
	ret = xmlStrdup((const xmlChar *) uri->path);
2512
    } else {
2513
	ret = xmlSaveUri(uri);
2514
    }
2515

2516
    xmlFreeURI(uri);
2517
#else
2518
    ret = xmlStrdup((const xmlChar *) path);
2519
#endif
2520
    return(ret);
2521
}
2522

2523
/**
2524
 * xmlPathToURI:
2525
 * @path:  the resource locator in a filesystem notation
2526
 *
2527
 * Constructs an URI expressing the existing path
2528
 *
2529
 * Returns a new URI, or a duplicate of the path parameter if the
2530
 * construction fails. The caller is responsible for freeing the memory
2531
 * occupied by the returned string. If there is insufficient memory available,
2532
 * or the argument is NULL, the function returns NULL.
2533
 */
2534
xmlChar *
2535
xmlPathToURI(const xmlChar *path)
2536
{
2537
    xmlURIPtr uri;
2538
    xmlURI temp;
2539
    xmlChar *ret, *cal;
2540

2541
    if (path == NULL)
2542
        return(NULL);
2543

2544
    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2545
	xmlFreeURI(uri);
2546
	return xmlStrdup(path);
2547
    }
2548
    cal = xmlCanonicPath(path);
2549
    if (cal == NULL)
2550
        return(NULL);
2551
#if defined(_WIN32)
2552
    /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2553
       If 'cal' is a valid URI already then we are done here, as continuing would make
2554
       it invalid. */
2555
    if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2556
	xmlFreeURI(uri);
2557
	return cal;
2558
    }
2559
    /* 'cal' can contain a relative path with backslashes. If that is processed
2560
       by xmlSaveURI, they will be escaped and the external entity loader machinery
2561
       will fail. So convert them to slashes. Misuse 'ret' for walking. */
2562
    ret = cal;
2563
    while (*ret != '\0') {
2564
	if (*ret == '\\')
2565
	    *ret = '/';
2566
	ret++;
2567
    }
2568
#endif
2569
    memset(&temp, 0, sizeof(temp));
2570
    temp.path = (char *) cal;
2571
    ret = xmlSaveUri(&temp);
2572
    xmlFree(cal);
2573
    return(ret);
2574
}
2575

2576
Product

Resources

Company