Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/xml2/uri.c
4389 views
1
/**
2
* uri.c: set of generic URI related routines
3
*
4
* Reference: RFCs 3986, 2732 and 2373
5
*
6
* See Copyright for the status of this software.
7
*
8
* [email protected]
9
*/
10
11
#define IN_LIBXML
12
#include "libxml.h"
13
14
#include <limits.h>
15
#include <string.h>
16
17
#include <libxml/xmlmemory.h>
18
#include <libxml/uri.h>
19
#include <libxml/xmlerror.h>
20
21
#include "private/error.h"
22
23
/**
24
* MAX_URI_LENGTH:
25
*
26
* The definition of the URI regexp in the above RFC has no size limit
27
* In practice they are usually relatively short except for the
28
* data URI scheme as defined in RFC 2397. Even for data URI the usual
29
* maximum size before hitting random practical limits is around 64 KB
30
* and 4KB is usually a maximum admitted limit for proper operations.
31
* The value below is more a security limit than anything else and
32
* really should never be hit by 'normal' operations
33
* Set to 1 MByte in 2012, this is only enforced on output
34
*/
35
#define MAX_URI_LENGTH 1024 * 1024
36
37
#define PORT_EMPTY 0
38
#define PORT_EMPTY_SERVER -1
39
40
static void
41
xmlURIErrMemory(const char *extra)
42
{
43
if (extra)
44
__xmlRaiseError(NULL, NULL, NULL,
45
NULL, NULL, XML_FROM_URI,
46
XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
47
extra, NULL, NULL, 0, 0,
48
"Memory allocation failed : %s\n", extra);
49
else
50
__xmlRaiseError(NULL, NULL, NULL,
51
NULL, NULL, XML_FROM_URI,
52
XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
53
NULL, NULL, NULL, 0, 0,
54
"Memory allocation failed\n");
55
}
56
57
static void xmlCleanURI(xmlURIPtr uri);
58
59
/*
60
* Old rule from 2396 used in legacy handling code
61
* alpha = lowalpha | upalpha
62
*/
63
#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
64
65
66
/*
67
* lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
68
* "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
69
* "u" | "v" | "w" | "x" | "y" | "z"
70
*/
71
72
#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
73
74
/*
75
* upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
76
* "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
77
* "U" | "V" | "W" | "X" | "Y" | "Z"
78
*/
79
#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
80
81
#ifdef IS_DIGIT
82
#undef IS_DIGIT
83
#endif
84
/*
85
* digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
86
*/
87
#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
88
89
/*
90
* alphanum = alpha | digit
91
*/
92
93
#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
94
95
/*
96
* mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
97
*/
98
99
#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
100
((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
101
((x) == '(') || ((x) == ')'))
102
103
/*
104
* unwise = "{" | "}" | "|" | "\" | "^" | "`"
105
*/
106
107
#define IS_UNWISE(p) \
108
(((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
109
((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
110
((*(p) == ']')) || ((*(p) == '`')))
111
/*
112
* reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
113
* "[" | "]"
114
*/
115
116
#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
117
((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
118
((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
119
((x) == ']'))
120
121
/*
122
* unreserved = alphanum | mark
123
*/
124
125
#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
126
127
/*
128
* Skip to next pointer char, handle escaped sequences
129
*/
130
131
#define NEXT(p) ((*p == '%')? p += 3 : p++)
132
133
/*
134
* Productions from the spec.
135
*
136
* authority = server | reg_name
137
* reg_name = 1*( unreserved | escaped | "$" | "," |
138
* ";" | ":" | "@" | "&" | "=" | "+" )
139
*
140
* path = [ abs_path | opaque_part ]
141
*/
142
143
#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
144
145
/************************************************************************
146
* *
147
* RFC 3986 parser *
148
* *
149
************************************************************************/
150
151
#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
152
#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
153
((*(p) >= 'A') && (*(p) <= 'Z')))
154
#define ISA_HEXDIG(p) \
155
(ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
156
((*(p) >= 'A') && (*(p) <= 'F')))
157
158
/*
159
* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
160
* / "*" / "+" / "," / ";" / "="
161
*/
162
#define ISA_SUB_DELIM(p) \
163
(((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
164
((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
165
((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
166
((*(p) == '=')) || ((*(p) == '\'')))
167
168
/*
169
* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
170
*/
171
#define ISA_GEN_DELIM(p) \
172
(((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
173
((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
174
((*(p) == '@')))
175
176
/*
177
* reserved = gen-delims / sub-delims
178
*/
179
#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
180
181
/*
182
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
183
*/
184
#define ISA_UNRESERVED(p) \
185
((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
186
((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
187
188
/*
189
* pct-encoded = "%" HEXDIG HEXDIG
190
*/
191
#define ISA_PCT_ENCODED(p) \
192
((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
193
194
/*
195
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
196
*/
197
#define ISA_PCHAR(p) \
198
(ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
199
((*(p) == ':')) || ((*(p) == '@')))
200
201
/**
202
* xmlParse3986Scheme:
203
* @uri: pointer to an URI structure
204
* @str: pointer to the string to analyze
205
*
206
* Parse an URI scheme
207
*
208
* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
209
*
210
* Returns 0 or the error code
211
*/
212
static int
213
xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
214
const char *cur;
215
216
if (str == NULL)
217
return(-1);
218
219
cur = *str;
220
if (!ISA_ALPHA(cur))
221
return(2);
222
cur++;
223
while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
224
(*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
225
if (uri != NULL) {
226
if (uri->scheme != NULL) xmlFree(uri->scheme);
227
uri->scheme = STRNDUP(*str, cur - *str);
228
}
229
*str = cur;
230
return(0);
231
}
232
233
/**
234
* xmlParse3986Fragment:
235
* @uri: pointer to an URI structure
236
* @str: pointer to the string to analyze
237
*
238
* Parse the query part of an URI
239
*
240
* fragment = *( pchar / "/" / "?" )
241
* NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
242
* in the fragment identifier but this is used very broadly for
243
* xpointer scheme selection, so we are allowing it here to not break
244
* for example all the DocBook processing chains.
245
*
246
* Returns 0 or the error code
247
*/
248
static int
249
xmlParse3986Fragment(xmlURIPtr uri, const char **str)
250
{
251
const char *cur;
252
253
if (str == NULL)
254
return (-1);
255
256
cur = *str;
257
258
while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
259
(*cur == '[') || (*cur == ']') ||
260
((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
261
NEXT(cur);
262
if (uri != NULL) {
263
if (uri->fragment != NULL)
264
xmlFree(uri->fragment);
265
if (uri->cleanup & 2)
266
uri->fragment = STRNDUP(*str, cur - *str);
267
else
268
uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
269
}
270
*str = cur;
271
return (0);
272
}
273
274
/**
275
* xmlParse3986Query:
276
* @uri: pointer to an URI structure
277
* @str: pointer to the string to analyze
278
*
279
* Parse the query part of an URI
280
*
281
* query = *uric
282
*
283
* Returns 0 or the error code
284
*/
285
static int
286
xmlParse3986Query(xmlURIPtr uri, const char **str)
287
{
288
const char *cur;
289
290
if (str == NULL)
291
return (-1);
292
293
cur = *str;
294
295
while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
296
((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
297
NEXT(cur);
298
if (uri != NULL) {
299
if (uri->query != NULL)
300
xmlFree(uri->query);
301
if (uri->cleanup & 2)
302
uri->query = STRNDUP(*str, cur - *str);
303
else
304
uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
305
306
/* Save the raw bytes of the query as well.
307
* See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
308
*/
309
if (uri->query_raw != NULL)
310
xmlFree (uri->query_raw);
311
uri->query_raw = STRNDUP (*str, cur - *str);
312
}
313
*str = cur;
314
return (0);
315
}
316
317
/**
318
* xmlParse3986Port:
319
* @uri: pointer to an URI structure
320
* @str: the string to analyze
321
*
322
* Parse a port part and fills in the appropriate fields
323
* of the @uri structure
324
*
325
* port = *DIGIT
326
*
327
* Returns 0 or the error code
328
*/
329
static int
330
xmlParse3986Port(xmlURIPtr uri, const char **str)
331
{
332
const char *cur = *str;
333
int port = 0;
334
335
if (ISA_DIGIT(cur)) {
336
while (ISA_DIGIT(cur)) {
337
int digit = *cur - '0';
338
339
if (port > INT_MAX / 10)
340
return(1);
341
port *= 10;
342
if (port > INT_MAX - digit)
343
return(1);
344
port += digit;
345
346
cur++;
347
}
348
if (uri != NULL)
349
uri->port = port;
350
*str = cur;
351
return(0);
352
}
353
return(1);
354
}
355
356
/**
357
* xmlParse3986Userinfo:
358
* @uri: pointer to an URI structure
359
* @str: the string to analyze
360
*
361
* Parse an user information part and fills in the appropriate fields
362
* of the @uri structure
363
*
364
* userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
365
*
366
* Returns 0 or the error code
367
*/
368
static int
369
xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
370
{
371
const char *cur;
372
373
cur = *str;
374
while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
375
ISA_SUB_DELIM(cur) || (*cur == ':'))
376
NEXT(cur);
377
if (*cur == '@') {
378
if (uri != NULL) {
379
if (uri->user != NULL) xmlFree(uri->user);
380
if (uri->cleanup & 2)
381
uri->user = STRNDUP(*str, cur - *str);
382
else
383
uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
384
}
385
*str = cur;
386
return(0);
387
}
388
return(1);
389
}
390
391
/**
392
* xmlParse3986DecOctet:
393
* @str: the string to analyze
394
*
395
* dec-octet = DIGIT ; 0-9
396
* / %x31-39 DIGIT ; 10-99
397
* / "1" 2DIGIT ; 100-199
398
* / "2" %x30-34 DIGIT ; 200-249
399
* / "25" %x30-35 ; 250-255
400
*
401
* Skip a dec-octet.
402
*
403
* Returns 0 if found and skipped, 1 otherwise
404
*/
405
static int
406
xmlParse3986DecOctet(const char **str) {
407
const char *cur = *str;
408
409
if (!(ISA_DIGIT(cur)))
410
return(1);
411
if (!ISA_DIGIT(cur+1))
412
cur++;
413
else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
414
cur += 2;
415
else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
416
cur += 3;
417
else if ((*cur == '2') && (*(cur + 1) >= '0') &&
418
(*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
419
cur += 3;
420
else if ((*cur == '2') && (*(cur + 1) == '5') &&
421
(*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
422
cur += 3;
423
else
424
return(1);
425
*str = cur;
426
return(0);
427
}
428
/**
429
* xmlParse3986Host:
430
* @uri: pointer to an URI structure
431
* @str: the string to analyze
432
*
433
* Parse an host part and fills in the appropriate fields
434
* of the @uri structure
435
*
436
* host = IP-literal / IPv4address / reg-name
437
* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
438
* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
439
* reg-name = *( unreserved / pct-encoded / sub-delims )
440
*
441
* Returns 0 or the error code
442
*/
443
static int
444
xmlParse3986Host(xmlURIPtr uri, const char **str)
445
{
446
const char *cur = *str;
447
const char *host;
448
449
host = cur;
450
/*
451
* IPv6 and future addressing scheme are enclosed between brackets
452
*/
453
if (*cur == '[') {
454
cur++;
455
while ((*cur != ']') && (*cur != 0))
456
cur++;
457
if (*cur != ']')
458
return(1);
459
cur++;
460
goto found;
461
}
462
/*
463
* try to parse an IPv4
464
*/
465
if (ISA_DIGIT(cur)) {
466
if (xmlParse3986DecOctet(&cur) != 0)
467
goto not_ipv4;
468
if (*cur != '.')
469
goto not_ipv4;
470
cur++;
471
if (xmlParse3986DecOctet(&cur) != 0)
472
goto not_ipv4;
473
if (*cur != '.')
474
goto not_ipv4;
475
if (xmlParse3986DecOctet(&cur) != 0)
476
goto not_ipv4;
477
if (*cur != '.')
478
goto not_ipv4;
479
if (xmlParse3986DecOctet(&cur) != 0)
480
goto not_ipv4;
481
goto found;
482
not_ipv4:
483
cur = *str;
484
}
485
/*
486
* then this should be a hostname which can be empty
487
*/
488
while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
489
NEXT(cur);
490
found:
491
if (uri != NULL) {
492
if (uri->authority != NULL) xmlFree(uri->authority);
493
uri->authority = NULL;
494
if (uri->server != NULL) xmlFree(uri->server);
495
if (cur != host) {
496
if (uri->cleanup & 2)
497
uri->server = STRNDUP(host, cur - host);
498
else
499
uri->server = xmlURIUnescapeString(host, cur - host, NULL);
500
} else
501
uri->server = NULL;
502
}
503
*str = cur;
504
return(0);
505
}
506
507
/**
508
* xmlParse3986Authority:
509
* @uri: pointer to an URI structure
510
* @str: the string to analyze
511
*
512
* Parse an authority part and fills in the appropriate fields
513
* of the @uri structure
514
*
515
* authority = [ userinfo "@" ] host [ ":" port ]
516
*
517
* Returns 0 or the error code
518
*/
519
static int
520
xmlParse3986Authority(xmlURIPtr uri, const char **str)
521
{
522
const char *cur;
523
int ret;
524
525
cur = *str;
526
/*
527
* try to parse an userinfo and check for the trailing @
528
*/
529
ret = xmlParse3986Userinfo(uri, &cur);
530
if ((ret != 0) || (*cur != '@'))
531
cur = *str;
532
else
533
cur++;
534
ret = xmlParse3986Host(uri, &cur);
535
if (ret != 0) return(ret);
536
if (*cur == ':') {
537
cur++;
538
ret = xmlParse3986Port(uri, &cur);
539
if (ret != 0) return(ret);
540
}
541
*str = cur;
542
return(0);
543
}
544
545
/**
546
* xmlParse3986Segment:
547
* @str: the string to analyze
548
* @forbid: an optional forbidden character
549
* @empty: allow an empty segment
550
*
551
* Parse a segment and fills in the appropriate fields
552
* of the @uri structure
553
*
554
* segment = *pchar
555
* segment-nz = 1*pchar
556
* segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
557
* ; non-zero-length segment without any colon ":"
558
*
559
* Returns 0 or the error code
560
*/
561
static int
562
xmlParse3986Segment(const char **str, char forbid, int empty)
563
{
564
const char *cur;
565
566
cur = *str;
567
if (!ISA_PCHAR(cur)) {
568
if (empty)
569
return(0);
570
return(1);
571
}
572
while (ISA_PCHAR(cur) && (*cur != forbid))
573
NEXT(cur);
574
*str = cur;
575
return (0);
576
}
577
578
/**
579
* xmlParse3986PathAbEmpty:
580
* @uri: pointer to an URI structure
581
* @str: the string to analyze
582
*
583
* Parse an path absolute or empty and fills in the appropriate fields
584
* of the @uri structure
585
*
586
* path-abempty = *( "/" segment )
587
*
588
* Returns 0 or the error code
589
*/
590
static int
591
xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
592
{
593
const char *cur;
594
int ret;
595
596
cur = *str;
597
598
while (*cur == '/') {
599
cur++;
600
ret = xmlParse3986Segment(&cur, 0, 1);
601
if (ret != 0) return(ret);
602
}
603
if (uri != NULL) {
604
if (uri->path != NULL) xmlFree(uri->path);
605
if (*str != cur) {
606
if (uri->cleanup & 2)
607
uri->path = STRNDUP(*str, cur - *str);
608
else
609
uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
610
} else {
611
uri->path = NULL;
612
}
613
}
614
*str = cur;
615
return (0);
616
}
617
618
/**
619
* xmlParse3986PathAbsolute:
620
* @uri: pointer to an URI structure
621
* @str: the string to analyze
622
*
623
* Parse an path absolute and fills in the appropriate fields
624
* of the @uri structure
625
*
626
* path-absolute = "/" [ segment-nz *( "/" segment ) ]
627
*
628
* Returns 0 or the error code
629
*/
630
static int
631
xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
632
{
633
const char *cur;
634
int ret;
635
636
cur = *str;
637
638
if (*cur != '/')
639
return(1);
640
cur++;
641
ret = xmlParse3986Segment(&cur, 0, 0);
642
if (ret == 0) {
643
while (*cur == '/') {
644
cur++;
645
ret = xmlParse3986Segment(&cur, 0, 1);
646
if (ret != 0) return(ret);
647
}
648
}
649
if (uri != NULL) {
650
if (uri->path != NULL) xmlFree(uri->path);
651
if (cur != *str) {
652
if (uri->cleanup & 2)
653
uri->path = STRNDUP(*str, cur - *str);
654
else
655
uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
656
} else {
657
uri->path = NULL;
658
}
659
}
660
*str = cur;
661
return (0);
662
}
663
664
/**
665
* xmlParse3986PathRootless:
666
* @uri: pointer to an URI structure
667
* @str: the string to analyze
668
*
669
* Parse an path without root and fills in the appropriate fields
670
* of the @uri structure
671
*
672
* path-rootless = segment-nz *( "/" segment )
673
*
674
* Returns 0 or the error code
675
*/
676
static int
677
xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
678
{
679
const char *cur;
680
int ret;
681
682
cur = *str;
683
684
ret = xmlParse3986Segment(&cur, 0, 0);
685
if (ret != 0) return(ret);
686
while (*cur == '/') {
687
cur++;
688
ret = xmlParse3986Segment(&cur, 0, 1);
689
if (ret != 0) return(ret);
690
}
691
if (uri != NULL) {
692
if (uri->path != NULL) xmlFree(uri->path);
693
if (cur != *str) {
694
if (uri->cleanup & 2)
695
uri->path = STRNDUP(*str, cur - *str);
696
else
697
uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
698
} else {
699
uri->path = NULL;
700
}
701
}
702
*str = cur;
703
return (0);
704
}
705
706
/**
707
* xmlParse3986PathNoScheme:
708
* @uri: pointer to an URI structure
709
* @str: the string to analyze
710
*
711
* Parse an path which is not a scheme and fills in the appropriate fields
712
* of the @uri structure
713
*
714
* path-noscheme = segment-nz-nc *( "/" segment )
715
*
716
* Returns 0 or the error code
717
*/
718
static int
719
xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
720
{
721
const char *cur;
722
int ret;
723
724
cur = *str;
725
726
ret = xmlParse3986Segment(&cur, ':', 0);
727
if (ret != 0) return(ret);
728
while (*cur == '/') {
729
cur++;
730
ret = xmlParse3986Segment(&cur, 0, 1);
731
if (ret != 0) return(ret);
732
}
733
if (uri != NULL) {
734
if (uri->path != NULL) xmlFree(uri->path);
735
if (cur != *str) {
736
if (uri->cleanup & 2)
737
uri->path = STRNDUP(*str, cur - *str);
738
else
739
uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
740
} else {
741
uri->path = NULL;
742
}
743
}
744
*str = cur;
745
return (0);
746
}
747
748
/**
749
* xmlParse3986HierPart:
750
* @uri: pointer to an URI structure
751
* @str: the string to analyze
752
*
753
* Parse an hierarchical part and fills in the appropriate fields
754
* of the @uri structure
755
*
756
* hier-part = "//" authority path-abempty
757
* / path-absolute
758
* / path-rootless
759
* / path-empty
760
*
761
* Returns 0 or the error code
762
*/
763
static int
764
xmlParse3986HierPart(xmlURIPtr uri, const char **str)
765
{
766
const char *cur;
767
int ret;
768
769
cur = *str;
770
771
if ((*cur == '/') && (*(cur + 1) == '/')) {
772
cur += 2;
773
ret = xmlParse3986Authority(uri, &cur);
774
if (ret != 0) return(ret);
775
/*
776
* An empty server is marked with a special URI value.
777
*/
778
if ((uri->server == NULL) && (uri->port == PORT_EMPTY))
779
uri->port = PORT_EMPTY_SERVER;
780
ret = xmlParse3986PathAbEmpty(uri, &cur);
781
if (ret != 0) return(ret);
782
*str = cur;
783
return(0);
784
} else if (*cur == '/') {
785
ret = xmlParse3986PathAbsolute(uri, &cur);
786
if (ret != 0) return(ret);
787
} else if (ISA_PCHAR(cur)) {
788
ret = xmlParse3986PathRootless(uri, &cur);
789
if (ret != 0) return(ret);
790
} else {
791
/* path-empty is effectively empty */
792
if (uri != NULL) {
793
if (uri->path != NULL) xmlFree(uri->path);
794
uri->path = NULL;
795
}
796
}
797
*str = cur;
798
return (0);
799
}
800
801
/**
802
* xmlParse3986RelativeRef:
803
* @uri: pointer to an URI structure
804
* @str: the string to analyze
805
*
806
* Parse an URI string and fills in the appropriate fields
807
* of the @uri structure
808
*
809
* relative-ref = relative-part [ "?" query ] [ "#" fragment ]
810
* relative-part = "//" authority path-abempty
811
* / path-absolute
812
* / path-noscheme
813
* / path-empty
814
*
815
* Returns 0 or the error code
816
*/
817
static int
818
xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
819
int ret;
820
821
if ((*str == '/') && (*(str + 1) == '/')) {
822
str += 2;
823
ret = xmlParse3986Authority(uri, &str);
824
if (ret != 0) return(ret);
825
ret = xmlParse3986PathAbEmpty(uri, &str);
826
if (ret != 0) return(ret);
827
} else if (*str == '/') {
828
ret = xmlParse3986PathAbsolute(uri, &str);
829
if (ret != 0) return(ret);
830
} else if (ISA_PCHAR(str)) {
831
ret = xmlParse3986PathNoScheme(uri, &str);
832
if (ret != 0) return(ret);
833
} else {
834
/* path-empty is effectively empty */
835
if (uri != NULL) {
836
if (uri->path != NULL) xmlFree(uri->path);
837
uri->path = NULL;
838
}
839
}
840
841
if (*str == '?') {
842
str++;
843
ret = xmlParse3986Query(uri, &str);
844
if (ret != 0) return(ret);
845
}
846
if (*str == '#') {
847
str++;
848
ret = xmlParse3986Fragment(uri, &str);
849
if (ret != 0) return(ret);
850
}
851
if (*str != 0) {
852
xmlCleanURI(uri);
853
return(1);
854
}
855
return(0);
856
}
857
858
859
/**
860
* xmlParse3986URI:
861
* @uri: pointer to an URI structure
862
* @str: the string to analyze
863
*
864
* Parse an URI string and fills in the appropriate fields
865
* of the @uri structure
866
*
867
* scheme ":" hier-part [ "?" query ] [ "#" fragment ]
868
*
869
* Returns 0 or the error code
870
*/
871
static int
872
xmlParse3986URI(xmlURIPtr uri, const char *str) {
873
int ret;
874
875
ret = xmlParse3986Scheme(uri, &str);
876
if (ret != 0) return(ret);
877
if (*str != ':') {
878
return(1);
879
}
880
str++;
881
ret = xmlParse3986HierPart(uri, &str);
882
if (ret != 0) return(ret);
883
if (*str == '?') {
884
str++;
885
ret = xmlParse3986Query(uri, &str);
886
if (ret != 0) return(ret);
887
}
888
if (*str == '#') {
889
str++;
890
ret = xmlParse3986Fragment(uri, &str);
891
if (ret != 0) return(ret);
892
}
893
if (*str != 0) {
894
xmlCleanURI(uri);
895
return(1);
896
}
897
return(0);
898
}
899
900
/**
901
* xmlParse3986URIReference:
902
* @uri: pointer to an URI structure
903
* @str: the string to analyze
904
*
905
* Parse an URI reference string and fills in the appropriate fields
906
* of the @uri structure
907
*
908
* URI-reference = URI / relative-ref
909
*
910
* Returns 0 or the error code
911
*/
912
static int
913
xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
914
int ret;
915
916
if (str == NULL)
917
return(-1);
918
xmlCleanURI(uri);
919
920
/*
921
* Try first to parse absolute refs, then fallback to relative if
922
* it fails.
923
*/
924
ret = xmlParse3986URI(uri, str);
925
if (ret != 0) {
926
xmlCleanURI(uri);
927
ret = xmlParse3986RelativeRef(uri, str);
928
if (ret != 0) {
929
xmlCleanURI(uri);
930
return(ret);
931
}
932
}
933
return(0);
934
}
935
936
/**
937
* xmlParseURI:
938
* @str: the URI string to analyze
939
*
940
* Parse an URI based on RFC 3986
941
*
942
* URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
943
*
944
* Returns a newly built xmlURIPtr or NULL in case of error
945
*/
946
xmlURIPtr
947
xmlParseURI(const char *str) {
948
xmlURIPtr uri;
949
int ret;
950
951
if (str == NULL)
952
return(NULL);
953
uri = xmlCreateURI();
954
if (uri != NULL) {
955
ret = xmlParse3986URIReference(uri, str);
956
if (ret) {
957
xmlFreeURI(uri);
958
return(NULL);
959
}
960
}
961
return(uri);
962
}
963
964
/**
965
* xmlParseURIReference:
966
* @uri: pointer to an URI structure
967
* @str: the string to analyze
968
*
969
* Parse an URI reference string based on RFC 3986 and fills in the
970
* appropriate fields of the @uri structure
971
*
972
* URI-reference = URI / relative-ref
973
*
974
* Returns 0 or the error code
975
*/
976
int
977
xmlParseURIReference(xmlURIPtr uri, const char *str) {
978
return(xmlParse3986URIReference(uri, str));
979
}
980
981
/**
982
* xmlParseURIRaw:
983
* @str: the URI string to analyze
984
* @raw: if 1 unescaping of URI pieces are disabled
985
*
986
* Parse an URI but allows to keep intact the original fragments.
987
*
988
* URI-reference = URI / relative-ref
989
*
990
* Returns a newly built xmlURIPtr or NULL in case of error
991
*/
992
xmlURIPtr
993
xmlParseURIRaw(const char *str, int raw) {
994
xmlURIPtr uri;
995
int ret;
996
997
if (str == NULL)
998
return(NULL);
999
uri = xmlCreateURI();
1000
if (uri != NULL) {
1001
if (raw) {
1002
uri->cleanup |= 2;
1003
}
1004
ret = xmlParseURIReference(uri, str);
1005
if (ret) {
1006
xmlFreeURI(uri);
1007
return(NULL);
1008
}
1009
}
1010
return(uri);
1011
}
1012
1013
/************************************************************************
1014
* *
1015
* Generic URI structure functions *
1016
* *
1017
************************************************************************/
1018
1019
/**
1020
* xmlCreateURI:
1021
*
1022
* Simply creates an empty xmlURI
1023
*
1024
* Returns the new structure or NULL in case of error
1025
*/
1026
xmlURIPtr
1027
xmlCreateURI(void) {
1028
xmlURIPtr ret;
1029
1030
ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1031
if (ret == NULL) {
1032
xmlURIErrMemory("creating URI structure\n");
1033
return(NULL);
1034
}
1035
memset(ret, 0, sizeof(xmlURI));
1036
ret->port = PORT_EMPTY;
1037
return(ret);
1038
}
1039
1040
/**
1041
* xmlSaveUriRealloc:
1042
*
1043
* Function to handle properly a reallocation when saving an URI
1044
* Also imposes some limit on the length of an URI string output
1045
*/
1046
static xmlChar *
1047
xmlSaveUriRealloc(xmlChar *ret, int *max) {
1048
xmlChar *temp;
1049
int tmp;
1050
1051
if (*max > MAX_URI_LENGTH) {
1052
xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1053
return(NULL);
1054
}
1055
tmp = *max * 2;
1056
temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1057
if (temp == NULL) {
1058
xmlURIErrMemory("saving URI\n");
1059
return(NULL);
1060
}
1061
*max = tmp;
1062
return(temp);
1063
}
1064
1065
/**
1066
* xmlSaveUri:
1067
* @uri: pointer to an xmlURI
1068
*
1069
* Save the URI as an escaped string
1070
*
1071
* Returns a new string (to be deallocated by caller)
1072
*/
1073
xmlChar *
1074
xmlSaveUri(xmlURIPtr uri) {
1075
xmlChar *ret = NULL;
1076
xmlChar *temp;
1077
const char *p;
1078
int len;
1079
int max;
1080
1081
if (uri == NULL) return(NULL);
1082
1083
1084
max = 80;
1085
ret = (xmlChar *) xmlMallocAtomic(max + 1);
1086
if (ret == NULL) {
1087
xmlURIErrMemory("saving URI\n");
1088
return(NULL);
1089
}
1090
len = 0;
1091
1092
if (uri->scheme != NULL) {
1093
p = uri->scheme;
1094
while (*p != 0) {
1095
if (len >= max) {
1096
temp = xmlSaveUriRealloc(ret, &max);
1097
if (temp == NULL) goto mem_error;
1098
ret = temp;
1099
}
1100
ret[len++] = *p++;
1101
}
1102
if (len >= max) {
1103
temp = xmlSaveUriRealloc(ret, &max);
1104
if (temp == NULL) goto mem_error;
1105
ret = temp;
1106
}
1107
ret[len++] = ':';
1108
}
1109
if (uri->opaque != NULL) {
1110
p = uri->opaque;
1111
while (*p != 0) {
1112
if (len + 3 >= max) {
1113
temp = xmlSaveUriRealloc(ret, &max);
1114
if (temp == NULL) goto mem_error;
1115
ret = temp;
1116
}
1117
if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1118
ret[len++] = *p++;
1119
else {
1120
int val = *(unsigned char *)p++;
1121
int hi = val / 0x10, lo = val % 0x10;
1122
ret[len++] = '%';
1123
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1124
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1125
}
1126
}
1127
} else {
1128
if ((uri->server != NULL) || (uri->port != PORT_EMPTY)) {
1129
if (len + 3 >= max) {
1130
temp = xmlSaveUriRealloc(ret, &max);
1131
if (temp == NULL) goto mem_error;
1132
ret = temp;
1133
}
1134
ret[len++] = '/';
1135
ret[len++] = '/';
1136
if (uri->user != NULL) {
1137
p = uri->user;
1138
while (*p != 0) {
1139
if (len + 3 >= max) {
1140
temp = xmlSaveUriRealloc(ret, &max);
1141
if (temp == NULL) goto mem_error;
1142
ret = temp;
1143
}
1144
if ((IS_UNRESERVED(*(p))) ||
1145
((*(p) == ';')) || ((*(p) == ':')) ||
1146
((*(p) == '&')) || ((*(p) == '=')) ||
1147
((*(p) == '+')) || ((*(p) == '$')) ||
1148
((*(p) == ',')))
1149
ret[len++] = *p++;
1150
else {
1151
int val = *(unsigned char *)p++;
1152
int hi = val / 0x10, lo = val % 0x10;
1153
ret[len++] = '%';
1154
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1155
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1156
}
1157
}
1158
if (len + 3 >= max) {
1159
temp = xmlSaveUriRealloc(ret, &max);
1160
if (temp == NULL) goto mem_error;
1161
ret = temp;
1162
}
1163
ret[len++] = '@';
1164
}
1165
if (uri->server != NULL) {
1166
p = uri->server;
1167
while (*p != 0) {
1168
if (len >= max) {
1169
temp = xmlSaveUriRealloc(ret, &max);
1170
if (temp == NULL) goto mem_error;
1171
ret = temp;
1172
}
1173
/* TODO: escaping? */
1174
ret[len++] = (xmlChar) *p++;
1175
}
1176
}
1177
if (uri->port > 0) {
1178
if (len + 10 >= max) {
1179
temp = xmlSaveUriRealloc(ret, &max);
1180
if (temp == NULL) goto mem_error;
1181
ret = temp;
1182
}
1183
len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1184
}
1185
} else if (uri->authority != NULL) {
1186
if (len + 3 >= max) {
1187
temp = xmlSaveUriRealloc(ret, &max);
1188
if (temp == NULL) goto mem_error;
1189
ret = temp;
1190
}
1191
ret[len++] = '/';
1192
ret[len++] = '/';
1193
p = uri->authority;
1194
while (*p != 0) {
1195
if (len + 3 >= max) {
1196
temp = xmlSaveUriRealloc(ret, &max);
1197
if (temp == NULL) goto mem_error;
1198
ret = temp;
1199
}
1200
if ((IS_UNRESERVED(*(p))) ||
1201
((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1202
((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1203
((*(p) == '=')) || ((*(p) == '+')))
1204
ret[len++] = *p++;
1205
else {
1206
int val = *(unsigned char *)p++;
1207
int hi = val / 0x10, lo = val % 0x10;
1208
ret[len++] = '%';
1209
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1210
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1211
}
1212
}
1213
} else if (uri->scheme != NULL) {
1214
if (len + 3 >= max) {
1215
temp = xmlSaveUriRealloc(ret, &max);
1216
if (temp == NULL) goto mem_error;
1217
ret = temp;
1218
}
1219
}
1220
if (uri->path != NULL) {
1221
p = uri->path;
1222
/*
1223
* the colon in file:///d: should not be escaped or
1224
* Windows accesses fail later.
1225
*/
1226
if ((uri->scheme != NULL) &&
1227
(p[0] == '/') &&
1228
(((p[1] >= 'a') && (p[1] <= 'z')) ||
1229
((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1230
(p[2] == ':') &&
1231
(xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1232
if (len + 3 >= max) {
1233
temp = xmlSaveUriRealloc(ret, &max);
1234
if (temp == NULL) goto mem_error;
1235
ret = temp;
1236
}
1237
ret[len++] = *p++;
1238
ret[len++] = *p++;
1239
ret[len++] = *p++;
1240
}
1241
while (*p != 0) {
1242
if (len + 3 >= max) {
1243
temp = xmlSaveUriRealloc(ret, &max);
1244
if (temp == NULL) goto mem_error;
1245
ret = temp;
1246
}
1247
if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1248
((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1249
((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1250
((*(p) == ',')))
1251
ret[len++] = *p++;
1252
else {
1253
int val = *(unsigned char *)p++;
1254
int hi = val / 0x10, lo = val % 0x10;
1255
ret[len++] = '%';
1256
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1257
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1258
}
1259
}
1260
}
1261
if (uri->query_raw != NULL) {
1262
if (len + 1 >= max) {
1263
temp = xmlSaveUriRealloc(ret, &max);
1264
if (temp == NULL) goto mem_error;
1265
ret = temp;
1266
}
1267
ret[len++] = '?';
1268
p = uri->query_raw;
1269
while (*p != 0) {
1270
if (len + 1 >= max) {
1271
temp = xmlSaveUriRealloc(ret, &max);
1272
if (temp == NULL) goto mem_error;
1273
ret = temp;
1274
}
1275
ret[len++] = *p++;
1276
}
1277
} else if (uri->query != NULL) {
1278
if (len + 3 >= max) {
1279
temp = xmlSaveUriRealloc(ret, &max);
1280
if (temp == NULL) goto mem_error;
1281
ret = temp;
1282
}
1283
ret[len++] = '?';
1284
p = uri->query;
1285
while (*p != 0) {
1286
if (len + 3 >= max) {
1287
temp = xmlSaveUriRealloc(ret, &max);
1288
if (temp == NULL) goto mem_error;
1289
ret = temp;
1290
}
1291
if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1292
ret[len++] = *p++;
1293
else {
1294
int val = *(unsigned char *)p++;
1295
int hi = val / 0x10, lo = val % 0x10;
1296
ret[len++] = '%';
1297
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1298
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1299
}
1300
}
1301
}
1302
}
1303
if (uri->fragment != NULL) {
1304
if (len + 3 >= max) {
1305
temp = xmlSaveUriRealloc(ret, &max);
1306
if (temp == NULL) goto mem_error;
1307
ret = temp;
1308
}
1309
ret[len++] = '#';
1310
p = uri->fragment;
1311
while (*p != 0) {
1312
if (len + 3 >= max) {
1313
temp = xmlSaveUriRealloc(ret, &max);
1314
if (temp == NULL) goto mem_error;
1315
ret = temp;
1316
}
1317
if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1318
ret[len++] = *p++;
1319
else {
1320
int val = *(unsigned char *)p++;
1321
int hi = val / 0x10, lo = val % 0x10;
1322
ret[len++] = '%';
1323
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1324
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1325
}
1326
}
1327
}
1328
if (len >= max) {
1329
temp = xmlSaveUriRealloc(ret, &max);
1330
if (temp == NULL) goto mem_error;
1331
ret = temp;
1332
}
1333
ret[len] = 0;
1334
return(ret);
1335
1336
mem_error:
1337
xmlFree(ret);
1338
return(NULL);
1339
}
1340
1341
/**
1342
* xmlPrintURI:
1343
* @stream: a FILE* for the output
1344
* @uri: pointer to an xmlURI
1345
*
1346
* Prints the URI in the stream @stream.
1347
*/
1348
void
1349
xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1350
xmlChar *out;
1351
1352
out = xmlSaveUri(uri);
1353
if (out != NULL) {
1354
fprintf(stream, "%s", (char *) out);
1355
xmlFree(out);
1356
}
1357
}
1358
1359
/**
1360
* xmlCleanURI:
1361
* @uri: pointer to an xmlURI
1362
*
1363
* Make sure the xmlURI struct is free of content
1364
*/
1365
static void
1366
xmlCleanURI(xmlURIPtr uri) {
1367
if (uri == NULL) return;
1368
1369
if (uri->scheme != NULL) xmlFree(uri->scheme);
1370
uri->scheme = NULL;
1371
if (uri->server != NULL) xmlFree(uri->server);
1372
uri->server = NULL;
1373
if (uri->user != NULL) xmlFree(uri->user);
1374
uri->user = NULL;
1375
if (uri->path != NULL) xmlFree(uri->path);
1376
uri->path = NULL;
1377
if (uri->fragment != NULL) xmlFree(uri->fragment);
1378
uri->fragment = NULL;
1379
if (uri->opaque != NULL) xmlFree(uri->opaque);
1380
uri->opaque = NULL;
1381
if (uri->authority != NULL) xmlFree(uri->authority);
1382
uri->authority = NULL;
1383
if (uri->query != NULL) xmlFree(uri->query);
1384
uri->query = NULL;
1385
if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1386
uri->query_raw = NULL;
1387
}
1388
1389
/**
1390
* xmlFreeURI:
1391
* @uri: pointer to an xmlURI
1392
*
1393
* Free up the xmlURI struct
1394
*/
1395
void
1396
xmlFreeURI(xmlURIPtr uri) {
1397
if (uri == NULL) return;
1398
1399
if (uri->scheme != NULL) xmlFree(uri->scheme);
1400
if (uri->server != NULL) xmlFree(uri->server);
1401
if (uri->user != NULL) xmlFree(uri->user);
1402
if (uri->path != NULL) xmlFree(uri->path);
1403
if (uri->fragment != NULL) xmlFree(uri->fragment);
1404
if (uri->opaque != NULL) xmlFree(uri->opaque);
1405
if (uri->authority != NULL) xmlFree(uri->authority);
1406
if (uri->query != NULL) xmlFree(uri->query);
1407
if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1408
xmlFree(uri);
1409
}
1410
1411
/************************************************************************
1412
* *
1413
* Helper functions *
1414
* *
1415
************************************************************************/
1416
1417
/**
1418
* xmlNormalizeURIPath:
1419
* @path: pointer to the path string
1420
*
1421
* Applies the 5 normalization steps to a path string--that is, RFC 2396
1422
* Section 5.2, steps 6.c through 6.g.
1423
*
1424
* Normalization occurs directly on the string, no new allocation is done
1425
*
1426
* Returns 0 or an error code
1427
*/
1428
int
1429
xmlNormalizeURIPath(char *path) {
1430
char *cur, *out;
1431
1432
if (path == NULL)
1433
return(-1);
1434
1435
/* Skip all initial "/" chars. We want to get to the beginning of the
1436
* first non-empty segment.
1437
*/
1438
cur = path;
1439
while (cur[0] == '/')
1440
++cur;
1441
if (cur[0] == '\0')
1442
return(0);
1443
1444
/* Keep everything we've seen so far. */
1445
out = cur;
1446
1447
/*
1448
* Analyze each segment in sequence for cases (c) and (d).
1449
*/
1450
while (cur[0] != '\0') {
1451
/*
1452
* c) All occurrences of "./", where "." is a complete path segment,
1453
* are removed from the buffer string.
1454
*/
1455
if ((cur[0] == '.') && (cur[1] == '/')) {
1456
cur += 2;
1457
/* '//' normalization should be done at this point too */
1458
while (cur[0] == '/')
1459
cur++;
1460
continue;
1461
}
1462
1463
/*
1464
* d) If the buffer string ends with "." as a complete path segment,
1465
* that "." is removed.
1466
*/
1467
if ((cur[0] == '.') && (cur[1] == '\0'))
1468
break;
1469
1470
/* Otherwise keep the segment. */
1471
while (cur[0] != '/') {
1472
if (cur[0] == '\0')
1473
goto done_cd;
1474
(out++)[0] = (cur++)[0];
1475
}
1476
/* normalize // */
1477
while ((cur[0] == '/') && (cur[1] == '/'))
1478
cur++;
1479
1480
(out++)[0] = (cur++)[0];
1481
}
1482
done_cd:
1483
out[0] = '\0';
1484
1485
/* Reset to the beginning of the first segment for the next sequence. */
1486
cur = path;
1487
while (cur[0] == '/')
1488
++cur;
1489
if (cur[0] == '\0')
1490
return(0);
1491
1492
/*
1493
* Analyze each segment in sequence for cases (e) and (f).
1494
*
1495
* e) All occurrences of "<segment>/../", where <segment> is a
1496
* complete path segment not equal to "..", are removed from the
1497
* buffer string. Removal of these path segments is performed
1498
* iteratively, removing the leftmost matching pattern on each
1499
* iteration, until no matching pattern remains.
1500
*
1501
* f) If the buffer string ends with "<segment>/..", where <segment>
1502
* is a complete path segment not equal to "..", that
1503
* "<segment>/.." is removed.
1504
*
1505
* To satisfy the "iterative" clause in (e), we need to collapse the
1506
* string every time we find something that needs to be removed. Thus,
1507
* we don't need to keep two pointers into the string: we only need a
1508
* "current position" pointer.
1509
*/
1510
while (1) {
1511
char *segp, *tmp;
1512
1513
/* At the beginning of each iteration of this loop, "cur" points to
1514
* the first character of the segment we want to examine.
1515
*/
1516
1517
/* Find the end of the current segment. */
1518
segp = cur;
1519
while ((segp[0] != '/') && (segp[0] != '\0'))
1520
++segp;
1521
1522
/* If this is the last segment, we're done (we need at least two
1523
* segments to meet the criteria for the (e) and (f) cases).
1524
*/
1525
if (segp[0] == '\0')
1526
break;
1527
1528
/* If the first segment is "..", or if the next segment _isn't_ "..",
1529
* keep this segment and try the next one.
1530
*/
1531
++segp;
1532
if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1533
|| ((segp[0] != '.') || (segp[1] != '.')
1534
|| ((segp[2] != '/') && (segp[2] != '\0')))) {
1535
cur = segp;
1536
continue;
1537
}
1538
1539
/* If we get here, remove this segment and the next one and back up
1540
* to the previous segment (if there is one), to implement the
1541
* "iteratively" clause. It's pretty much impossible to back up
1542
* while maintaining two pointers into the buffer, so just compact
1543
* the whole buffer now.
1544
*/
1545
1546
/* If this is the end of the buffer, we're done. */
1547
if (segp[2] == '\0') {
1548
cur[0] = '\0';
1549
break;
1550
}
1551
/* Valgrind complained, strcpy(cur, segp + 3); */
1552
/* string will overlap, do not use strcpy */
1553
tmp = cur;
1554
segp += 3;
1555
while ((*tmp++ = *segp++) != 0)
1556
;
1557
1558
/* If there are no previous segments, then keep going from here. */
1559
segp = cur;
1560
while ((segp > path) && ((--segp)[0] == '/'))
1561
;
1562
if (segp == path)
1563
continue;
1564
1565
/* "segp" is pointing to the end of a previous segment; find it's
1566
* start. We need to back up to the previous segment and start
1567
* over with that to handle things like "foo/bar/../..". If we
1568
* don't do this, then on the first pass we'll remove the "bar/..",
1569
* but be pointing at the second ".." so we won't realize we can also
1570
* remove the "foo/..".
1571
*/
1572
cur = segp;
1573
while ((cur > path) && (cur[-1] != '/'))
1574
--cur;
1575
}
1576
out[0] = '\0';
1577
1578
/*
1579
* g) If the resulting buffer string still begins with one or more
1580
* complete path segments of "..", then the reference is
1581
* considered to be in error. Implementations may handle this
1582
* error by retaining these components in the resolved path (i.e.,
1583
* treating them as part of the final URI), by removing them from
1584
* the resolved path (i.e., discarding relative levels above the
1585
* root), or by avoiding traversal of the reference.
1586
*
1587
* We discard them from the final path.
1588
*/
1589
if (path[0] == '/') {
1590
cur = path;
1591
while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1592
&& ((cur[3] == '/') || (cur[3] == '\0')))
1593
cur += 3;
1594
1595
if (cur != path) {
1596
out = path;
1597
while (cur[0] != '\0')
1598
(out++)[0] = (cur++)[0];
1599
out[0] = 0;
1600
}
1601
}
1602
1603
return(0);
1604
}
1605
1606
static int is_hex(char c) {
1607
if (((c >= '0') && (c <= '9')) ||
1608
((c >= 'a') && (c <= 'f')) ||
1609
((c >= 'A') && (c <= 'F')))
1610
return(1);
1611
return(0);
1612
}
1613
1614
/**
1615
* xmlURIUnescapeString:
1616
* @str: the string to unescape
1617
* @len: the length in bytes to unescape (or <= 0 to indicate full string)
1618
* @target: optional destination buffer
1619
*
1620
* Unescaping routine, but does not check that the string is an URI. The
1621
* output is a direct unsigned char translation of %XX values (no encoding)
1622
* Note that the length of the result can only be smaller or same size as
1623
* the input string.
1624
*
1625
* Returns a copy of the string, but unescaped, will return NULL only in case
1626
* of error
1627
*/
1628
char *
1629
xmlURIUnescapeString(const char *str, int len, char *target) {
1630
char *ret, *out;
1631
const char *in;
1632
1633
if (str == NULL)
1634
return(NULL);
1635
if (len <= 0) len = strlen(str);
1636
if (len < 0) return(NULL);
1637
1638
if (target == NULL) {
1639
ret = (char *) xmlMallocAtomic(len + 1);
1640
if (ret == NULL) {
1641
xmlURIErrMemory("unescaping URI value\n");
1642
return(NULL);
1643
}
1644
} else
1645
ret = target;
1646
in = str;
1647
out = ret;
1648
while(len > 0) {
1649
if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1650
int c = 0;
1651
in++;
1652
if ((*in >= '0') && (*in <= '9'))
1653
c = (*in - '0');
1654
else if ((*in >= 'a') && (*in <= 'f'))
1655
c = (*in - 'a') + 10;
1656
else if ((*in >= 'A') && (*in <= 'F'))
1657
c = (*in - 'A') + 10;
1658
in++;
1659
if ((*in >= '0') && (*in <= '9'))
1660
c = c * 16 + (*in - '0');
1661
else if ((*in >= 'a') && (*in <= 'f'))
1662
c = c * 16 + (*in - 'a') + 10;
1663
else if ((*in >= 'A') && (*in <= 'F'))
1664
c = c * 16 + (*in - 'A') + 10;
1665
in++;
1666
len -= 3;
1667
/* Explicit sign change */
1668
*out++ = (char) c;
1669
} else {
1670
*out++ = *in++;
1671
len--;
1672
}
1673
}
1674
*out = 0;
1675
return(ret);
1676
}
1677
1678
/**
1679
* xmlURIEscapeStr:
1680
* @str: string to escape
1681
* @list: exception list string of chars not to escape
1682
*
1683
* This routine escapes a string to hex, ignoring reserved characters
1684
* (a-z, A-Z, 0-9, "@-_.!~*'()") and the characters in the exception list.
1685
*
1686
* Returns a new escaped string or NULL in case of error.
1687
*/
1688
xmlChar *
1689
xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1690
xmlChar *ret, ch;
1691
xmlChar *temp;
1692
const xmlChar *in;
1693
int len, out;
1694
1695
if (str == NULL)
1696
return(NULL);
1697
if (str[0] == 0)
1698
return(xmlStrdup(str));
1699
len = xmlStrlen(str);
1700
if (!(len > 0)) return(NULL);
1701
1702
len += 20;
1703
ret = (xmlChar *) xmlMallocAtomic(len);
1704
if (ret == NULL) {
1705
xmlURIErrMemory("escaping URI value\n");
1706
return(NULL);
1707
}
1708
in = (const xmlChar *) str;
1709
out = 0;
1710
while(*in != 0) {
1711
if (len - out <= 3) {
1712
temp = xmlSaveUriRealloc(ret, &len);
1713
if (temp == NULL) {
1714
xmlURIErrMemory("escaping URI value\n");
1715
xmlFree(ret);
1716
return(NULL);
1717
}
1718
ret = temp;
1719
}
1720
1721
ch = *in;
1722
1723
if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1724
unsigned char val;
1725
ret[out++] = '%';
1726
val = ch >> 4;
1727
if (val <= 9)
1728
ret[out++] = '0' + val;
1729
else
1730
ret[out++] = 'A' + val - 0xA;
1731
val = ch & 0xF;
1732
if (val <= 9)
1733
ret[out++] = '0' + val;
1734
else
1735
ret[out++] = 'A' + val - 0xA;
1736
in++;
1737
} else {
1738
ret[out++] = *in++;
1739
}
1740
1741
}
1742
ret[out] = 0;
1743
return(ret);
1744
}
1745
1746
/**
1747
* xmlURIEscape:
1748
* @str: the string of the URI to escape
1749
*
1750
* Escaping routine, does not do validity checks !
1751
* It will try to escape the chars needing this, but this is heuristic
1752
* based it's impossible to be sure.
1753
*
1754
* Returns an copy of the string, but escaped
1755
*
1756
* 25 May 2001
1757
* Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1758
* according to RFC2396.
1759
* - Carl Douglas
1760
*/
1761
xmlChar *
1762
xmlURIEscape(const xmlChar * str)
1763
{
1764
xmlChar *ret, *segment = NULL;
1765
xmlURIPtr uri;
1766
int ret2;
1767
1768
if (str == NULL)
1769
return (NULL);
1770
1771
uri = xmlCreateURI();
1772
if (uri != NULL) {
1773
/*
1774
* Allow escaping errors in the unescaped form
1775
*/
1776
uri->cleanup = 1;
1777
ret2 = xmlParseURIReference(uri, (const char *)str);
1778
if (ret2) {
1779
xmlFreeURI(uri);
1780
return (NULL);
1781
}
1782
}
1783
1784
if (!uri)
1785
return NULL;
1786
1787
ret = NULL;
1788
1789
#define NULLCHK(p) if(!p) { \
1790
xmlURIErrMemory("escaping URI value\n"); \
1791
xmlFreeURI(uri); \
1792
xmlFree(ret); \
1793
return NULL; } \
1794
1795
if (uri->scheme) {
1796
segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1797
NULLCHK(segment)
1798
ret = xmlStrcat(ret, segment);
1799
ret = xmlStrcat(ret, BAD_CAST ":");
1800
xmlFree(segment);
1801
}
1802
1803
if (uri->authority) {
1804
segment =
1805
xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1806
NULLCHK(segment)
1807
ret = xmlStrcat(ret, BAD_CAST "//");
1808
ret = xmlStrcat(ret, segment);
1809
xmlFree(segment);
1810
}
1811
1812
if (uri->user) {
1813
segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1814
NULLCHK(segment)
1815
ret = xmlStrcat(ret,BAD_CAST "//");
1816
ret = xmlStrcat(ret, segment);
1817
ret = xmlStrcat(ret, BAD_CAST "@");
1818
xmlFree(segment);
1819
}
1820
1821
if (uri->server) {
1822
segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1823
NULLCHK(segment)
1824
if (uri->user == NULL)
1825
ret = xmlStrcat(ret, BAD_CAST "//");
1826
ret = xmlStrcat(ret, segment);
1827
xmlFree(segment);
1828
}
1829
1830
if (uri->port > 0) {
1831
xmlChar port[11];
1832
1833
snprintf((char *) port, 11, "%d", uri->port);
1834
ret = xmlStrcat(ret, BAD_CAST ":");
1835
ret = xmlStrcat(ret, port);
1836
}
1837
1838
if (uri->path) {
1839
segment =
1840
xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1841
NULLCHK(segment)
1842
ret = xmlStrcat(ret, segment);
1843
xmlFree(segment);
1844
}
1845
1846
if (uri->query_raw) {
1847
ret = xmlStrcat(ret, BAD_CAST "?");
1848
ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1849
}
1850
else if (uri->query) {
1851
segment =
1852
xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1853
NULLCHK(segment)
1854
ret = xmlStrcat(ret, BAD_CAST "?");
1855
ret = xmlStrcat(ret, segment);
1856
xmlFree(segment);
1857
}
1858
1859
if (uri->opaque) {
1860
segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1861
NULLCHK(segment)
1862
ret = xmlStrcat(ret, segment);
1863
xmlFree(segment);
1864
}
1865
1866
if (uri->fragment) {
1867
segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1868
NULLCHK(segment)
1869
ret = xmlStrcat(ret, BAD_CAST "#");
1870
ret = xmlStrcat(ret, segment);
1871
xmlFree(segment);
1872
}
1873
1874
xmlFreeURI(uri);
1875
#undef NULLCHK
1876
1877
return (ret);
1878
}
1879
1880
/************************************************************************
1881
* *
1882
* Public functions *
1883
* *
1884
************************************************************************/
1885
1886
/**
1887
* xmlBuildURI:
1888
* @URI: the URI instance found in the document
1889
* @base: the base value
1890
*
1891
* Computes he final URI of the reference done by checking that
1892
* the given URI is valid, and building the final URI using the
1893
* base URI. This is processed according to section 5.2 of the
1894
* RFC 2396
1895
*
1896
* 5.2. Resolving Relative References to Absolute Form
1897
*
1898
* Returns a new URI string (to be freed by the caller) or NULL in case
1899
* of error.
1900
*/
1901
xmlChar *
1902
xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1903
xmlChar *val = NULL;
1904
int ret, len, indx, cur, out;
1905
xmlURIPtr ref = NULL;
1906
xmlURIPtr bas = NULL;
1907
xmlURIPtr res = NULL;
1908
1909
/*
1910
* 1) The URI reference is parsed into the potential four components and
1911
* fragment identifier, as described in Section 4.3.
1912
*
1913
* NOTE that a completely empty URI is treated by modern browsers
1914
* as a reference to "." rather than as a synonym for the current
1915
* URI. Should we do that here?
1916
*/
1917
if (URI == NULL)
1918
ret = -1;
1919
else {
1920
if (*URI) {
1921
ref = xmlCreateURI();
1922
if (ref == NULL)
1923
goto done;
1924
ret = xmlParseURIReference(ref, (const char *) URI);
1925
}
1926
else
1927
ret = 0;
1928
}
1929
if (ret != 0)
1930
goto done;
1931
if ((ref != NULL) && (ref->scheme != NULL)) {
1932
/*
1933
* The URI is absolute don't modify.
1934
*/
1935
val = xmlStrdup(URI);
1936
goto done;
1937
}
1938
if (base == NULL)
1939
ret = -1;
1940
else {
1941
bas = xmlCreateURI();
1942
if (bas == NULL)
1943
goto done;
1944
ret = xmlParseURIReference(bas, (const char *) base);
1945
}
1946
if (ret != 0) {
1947
if (ref)
1948
val = xmlSaveUri(ref);
1949
goto done;
1950
}
1951
if (ref == NULL) {
1952
/*
1953
* the base fragment must be ignored
1954
*/
1955
if (bas->fragment != NULL) {
1956
xmlFree(bas->fragment);
1957
bas->fragment = NULL;
1958
}
1959
val = xmlSaveUri(bas);
1960
goto done;
1961
}
1962
1963
/*
1964
* 2) If the path component is empty and the scheme, authority, and
1965
* query components are undefined, then it is a reference to the
1966
* current document and we are done. Otherwise, the reference URI's
1967
* query and fragment components are defined as found (or not found)
1968
* within the URI reference and not inherited from the base URI.
1969
*
1970
* NOTE that in modern browsers, the parsing differs from the above
1971
* in the following aspect: the query component is allowed to be
1972
* defined while still treating this as a reference to the current
1973
* document.
1974
*/
1975
res = xmlCreateURI();
1976
if (res == NULL)
1977
goto done;
1978
if ((ref->scheme == NULL) && (ref->path == NULL) &&
1979
((ref->authority == NULL) && (ref->server == NULL) &&
1980
(ref->port == PORT_EMPTY))) {
1981
if (bas->scheme != NULL)
1982
res->scheme = xmlMemStrdup(bas->scheme);
1983
if (bas->authority != NULL)
1984
res->authority = xmlMemStrdup(bas->authority);
1985
else {
1986
if (bas->server != NULL)
1987
res->server = xmlMemStrdup(bas->server);
1988
if (bas->user != NULL)
1989
res->user = xmlMemStrdup(bas->user);
1990
res->port = bas->port;
1991
}
1992
if (bas->path != NULL)
1993
res->path = xmlMemStrdup(bas->path);
1994
if (ref->query_raw != NULL)
1995
res->query_raw = xmlMemStrdup (ref->query_raw);
1996
else if (ref->query != NULL)
1997
res->query = xmlMemStrdup(ref->query);
1998
else if (bas->query_raw != NULL)
1999
res->query_raw = xmlMemStrdup(bas->query_raw);
2000
else if (bas->query != NULL)
2001
res->query = xmlMemStrdup(bas->query);
2002
if (ref->fragment != NULL)
2003
res->fragment = xmlMemStrdup(ref->fragment);
2004
goto step_7;
2005
}
2006
2007
/*
2008
* 3) If the scheme component is defined, indicating that the reference
2009
* starts with a scheme name, then the reference is interpreted as an
2010
* absolute URI and we are done. Otherwise, the reference URI's
2011
* scheme is inherited from the base URI's scheme component.
2012
*/
2013
if (ref->scheme != NULL) {
2014
val = xmlSaveUri(ref);
2015
goto done;
2016
}
2017
if (bas->scheme != NULL)
2018
res->scheme = xmlMemStrdup(bas->scheme);
2019
2020
if (ref->query_raw != NULL)
2021
res->query_raw = xmlMemStrdup(ref->query_raw);
2022
else if (ref->query != NULL)
2023
res->query = xmlMemStrdup(ref->query);
2024
if (ref->fragment != NULL)
2025
res->fragment = xmlMemStrdup(ref->fragment);
2026
2027
/*
2028
* 4) If the authority component is defined, then the reference is a
2029
* network-path and we skip to step 7. Otherwise, the reference
2030
* URI's authority is inherited from the base URI's authority
2031
* component, which will also be undefined if the URI scheme does not
2032
* use an authority component.
2033
*/
2034
if ((ref->authority != NULL) || (ref->server != NULL) ||
2035
(ref->port != PORT_EMPTY)) {
2036
if (ref->authority != NULL)
2037
res->authority = xmlMemStrdup(ref->authority);
2038
else {
2039
if (ref->server != NULL)
2040
res->server = xmlMemStrdup(ref->server);
2041
if (ref->user != NULL)
2042
res->user = xmlMemStrdup(ref->user);
2043
res->port = ref->port;
2044
}
2045
if (ref->path != NULL)
2046
res->path = xmlMemStrdup(ref->path);
2047
goto step_7;
2048
}
2049
if (bas->authority != NULL)
2050
res->authority = xmlMemStrdup(bas->authority);
2051
else if ((bas->server != NULL) || (bas->port != PORT_EMPTY)) {
2052
if (bas->server != NULL)
2053
res->server = xmlMemStrdup(bas->server);
2054
if (bas->user != NULL)
2055
res->user = xmlMemStrdup(bas->user);
2056
res->port = bas->port;
2057
}
2058
2059
/*
2060
* 5) If the path component begins with a slash character ("/"), then
2061
* the reference is an absolute-path and we skip to step 7.
2062
*/
2063
if ((ref->path != NULL) && (ref->path[0] == '/')) {
2064
res->path = xmlMemStrdup(ref->path);
2065
goto step_7;
2066
}
2067
2068
2069
/*
2070
* 6) If this step is reached, then we are resolving a relative-path
2071
* reference. The relative path needs to be merged with the base
2072
* URI's path. Although there are many ways to do this, we will
2073
* describe a simple method using a separate string buffer.
2074
*
2075
* Allocate a buffer large enough for the result string.
2076
*/
2077
len = 2; /* extra / and 0 */
2078
if (ref->path != NULL)
2079
len += strlen(ref->path);
2080
if (bas->path != NULL)
2081
len += strlen(bas->path);
2082
res->path = (char *) xmlMallocAtomic(len);
2083
if (res->path == NULL) {
2084
xmlURIErrMemory("resolving URI against base\n");
2085
goto done;
2086
}
2087
res->path[0] = 0;
2088
2089
/*
2090
* a) All but the last segment of the base URI's path component is
2091
* copied to the buffer. In other words, any characters after the
2092
* last (right-most) slash character, if any, are excluded.
2093
*/
2094
cur = 0;
2095
out = 0;
2096
if (bas->path != NULL) {
2097
while (bas->path[cur] != 0) {
2098
while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2099
cur++;
2100
if (bas->path[cur] == 0)
2101
break;
2102
2103
cur++;
2104
while (out < cur) {
2105
res->path[out] = bas->path[out];
2106
out++;
2107
}
2108
}
2109
}
2110
res->path[out] = 0;
2111
2112
/*
2113
* b) The reference's path component is appended to the buffer
2114
* string.
2115
*/
2116
if (ref->path != NULL && ref->path[0] != 0) {
2117
indx = 0;
2118
/*
2119
* Ensure the path includes a '/'
2120
*/
2121
if ((out == 0) && ((bas->server != NULL) || bas->port != PORT_EMPTY))
2122
res->path[out++] = '/';
2123
while (ref->path[indx] != 0) {
2124
res->path[out++] = ref->path[indx++];
2125
}
2126
}
2127
res->path[out] = 0;
2128
2129
/*
2130
* Steps c) to h) are really path normalization steps
2131
*/
2132
xmlNormalizeURIPath(res->path);
2133
2134
step_7:
2135
2136
/*
2137
* 7) The resulting URI components, including any inherited from the
2138
* base URI, are recombined to give the absolute form of the URI
2139
* reference.
2140
*/
2141
val = xmlSaveUri(res);
2142
2143
done:
2144
if (ref != NULL)
2145
xmlFreeURI(ref);
2146
if (bas != NULL)
2147
xmlFreeURI(bas);
2148
if (res != NULL)
2149
xmlFreeURI(res);
2150
return(val);
2151
}
2152
2153
/**
2154
* xmlBuildRelativeURI:
2155
* @URI: the URI reference under consideration
2156
* @base: the base value
2157
*
2158
* Expresses the URI of the reference in terms relative to the
2159
* base. Some examples of this operation include:
2160
* base = "http://site1.com/docs/book1.html"
2161
* URI input URI returned
2162
* docs/pic1.gif pic1.gif
2163
* docs/img/pic1.gif img/pic1.gif
2164
* img/pic1.gif ../img/pic1.gif
2165
* http://site1.com/docs/pic1.gif pic1.gif
2166
* http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2167
*
2168
* base = "docs/book1.html"
2169
* URI input URI returned
2170
* docs/pic1.gif pic1.gif
2171
* docs/img/pic1.gif img/pic1.gif
2172
* img/pic1.gif ../img/pic1.gif
2173
* http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2174
*
2175
*
2176
* Note: if the URI reference is really weird or complicated, it may be
2177
* worthwhile to first convert it into a "nice" one by calling
2178
* xmlBuildURI (using 'base') before calling this routine,
2179
* since this routine (for reasonable efficiency) assumes URI has
2180
* already been through some validation.
2181
*
2182
* Returns a new URI string (to be freed by the caller) or NULL in case
2183
* error.
2184
*/
2185
xmlChar *
2186
xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2187
{
2188
xmlChar *val = NULL;
2189
int ret;
2190
int ix;
2191
int nbslash = 0;
2192
int len;
2193
xmlURIPtr ref = NULL;
2194
xmlURIPtr bas = NULL;
2195
xmlChar *bptr, *uptr, *vptr;
2196
int remove_path = 0;
2197
2198
if ((URI == NULL) || (*URI == 0))
2199
return NULL;
2200
2201
/*
2202
* First parse URI into a standard form
2203
*/
2204
ref = xmlCreateURI ();
2205
if (ref == NULL)
2206
return NULL;
2207
/* If URI not already in "relative" form */
2208
if (URI[0] != '.') {
2209
ret = xmlParseURIReference (ref, (const char *) URI);
2210
if (ret != 0)
2211
goto done; /* Error in URI, return NULL */
2212
} else
2213
ref->path = (char *)xmlStrdup(URI);
2214
2215
/*
2216
* Next parse base into the same standard form
2217
*/
2218
if ((base == NULL) || (*base == 0)) {
2219
val = xmlStrdup (URI);
2220
goto done;
2221
}
2222
bas = xmlCreateURI ();
2223
if (bas == NULL)
2224
goto done;
2225
if (base[0] != '.') {
2226
ret = xmlParseURIReference (bas, (const char *) base);
2227
if (ret != 0)
2228
goto done; /* Error in base, return NULL */
2229
} else
2230
bas->path = (char *)xmlStrdup(base);
2231
2232
/*
2233
* If the scheme / server on the URI differs from the base,
2234
* just return the URI
2235
*/
2236
if ((ref->scheme != NULL) &&
2237
((bas->scheme == NULL) ||
2238
(xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2239
(xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)) ||
2240
(bas->port != ref->port))) {
2241
val = xmlStrdup (URI);
2242
goto done;
2243
}
2244
if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2245
val = xmlStrdup(BAD_CAST "");
2246
goto done;
2247
}
2248
if (bas->path == NULL) {
2249
val = xmlStrdup((xmlChar *)ref->path);
2250
goto done;
2251
}
2252
if (ref->path == NULL) {
2253
ref->path = (char *) "/";
2254
remove_path = 1;
2255
}
2256
2257
/*
2258
* At this point (at last!) we can compare the two paths
2259
*
2260
* First we take care of the special case where either of the
2261
* two path components may be missing (bug 316224)
2262
*/
2263
bptr = (xmlChar *)bas->path;
2264
{
2265
xmlChar *rptr = (xmlChar *) ref->path;
2266
int pos = 0;
2267
2268
/*
2269
* Next we compare the two strings and find where they first differ
2270
*/
2271
if ((*rptr == '.') && (rptr[1] == '/'))
2272
rptr += 2;
2273
if ((*bptr == '.') && (bptr[1] == '/'))
2274
bptr += 2;
2275
else if ((*bptr == '/') && (*rptr != '/'))
2276
bptr++;
2277
while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2278
pos++;
2279
2280
if (bptr[pos] == rptr[pos]) {
2281
val = xmlStrdup(BAD_CAST "");
2282
goto done; /* (I can't imagine why anyone would do this) */
2283
}
2284
2285
/*
2286
* In URI, "back up" to the last '/' encountered. This will be the
2287
* beginning of the "unique" suffix of URI
2288
*/
2289
ix = pos;
2290
for (; ix > 0; ix--) {
2291
if (rptr[ix - 1] == '/')
2292
break;
2293
}
2294
uptr = (xmlChar *)&rptr[ix];
2295
2296
/*
2297
* In base, count the number of '/' from the differing point
2298
*/
2299
for (; bptr[ix] != 0; ix++) {
2300
if (bptr[ix] == '/')
2301
nbslash++;
2302
}
2303
2304
/*
2305
* e.g: URI="foo/" base="foo/bar" -> "./"
2306
*/
2307
if (nbslash == 0 && !uptr[0]) {
2308
val = xmlStrdup(BAD_CAST "./");
2309
goto done;
2310
}
2311
2312
len = xmlStrlen (uptr) + 1;
2313
}
2314
2315
if (nbslash == 0) {
2316
if (uptr != NULL)
2317
/* exception characters from xmlSaveUri */
2318
val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2319
goto done;
2320
}
2321
2322
/*
2323
* Allocate just enough space for the returned string -
2324
* length of the remainder of the URI, plus enough space
2325
* for the "../" groups, plus one for the terminator
2326
*/
2327
val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2328
if (val == NULL) {
2329
xmlURIErrMemory("building relative URI\n");
2330
goto done;
2331
}
2332
vptr = val;
2333
/*
2334
* Put in as many "../" as needed
2335
*/
2336
for (; nbslash>0; nbslash--) {
2337
*vptr++ = '.';
2338
*vptr++ = '.';
2339
*vptr++ = '/';
2340
}
2341
/*
2342
* Finish up with the end of the URI
2343
*/
2344
if (uptr != NULL) {
2345
if ((vptr > val) && (len > 0) &&
2346
(uptr[0] == '/') && (vptr[-1] == '/')) {
2347
memcpy (vptr, uptr + 1, len - 1);
2348
vptr[len - 2] = 0;
2349
} else {
2350
memcpy (vptr, uptr, len);
2351
vptr[len - 1] = 0;
2352
}
2353
} else {
2354
vptr[len - 1] = 0;
2355
}
2356
2357
/* escape the freshly-built path */
2358
vptr = val;
2359
/* exception characters from xmlSaveUri */
2360
val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2361
xmlFree(vptr);
2362
2363
done:
2364
/*
2365
* Free the working variables
2366
*/
2367
if (remove_path != 0)
2368
ref->path = NULL;
2369
if (ref != NULL)
2370
xmlFreeURI (ref);
2371
if (bas != NULL)
2372
xmlFreeURI (bas);
2373
2374
return val;
2375
}
2376
2377
/**
2378
* xmlCanonicPath:
2379
* @path: the resource locator in a filesystem notation
2380
*
2381
* Constructs a canonic path from the specified path.
2382
*
2383
* Returns a new canonic path, or a duplicate of the path parameter if the
2384
* construction fails. The caller is responsible for freeing the memory occupied
2385
* by the returned string. If there is insufficient memory available, or the
2386
* argument is NULL, the function returns NULL.
2387
*/
2388
#define IS_WINDOWS_PATH(p) \
2389
((p != NULL) && \
2390
(((p[0] >= 'a') && (p[0] <= 'z')) || \
2391
((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2392
(p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2393
xmlChar *
2394
xmlCanonicPath(const xmlChar *path)
2395
{
2396
/*
2397
* For Windows implementations, additional work needs to be done to
2398
* replace backslashes in pathnames with "forward slashes"
2399
*/
2400
#if defined(_WIN32)
2401
int len = 0;
2402
char *p = NULL;
2403
#endif
2404
xmlURIPtr uri;
2405
xmlChar *ret;
2406
const xmlChar *absuri;
2407
2408
if (path == NULL)
2409
return(NULL);
2410
2411
#if defined(_WIN32)
2412
/*
2413
* We must not change the backslashes to slashes if the the path
2414
* starts with \\?\
2415
* Those paths can be up to 32k characters long.
2416
* Was added specifically for OpenOffice, those paths can't be converted
2417
* to URIs anyway.
2418
*/
2419
if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2420
(path[3] == '\\') )
2421
return xmlStrdup((const xmlChar *) path);
2422
#endif
2423
2424
/* sanitize filename starting with // so it can be used as URI */
2425
if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2426
path++;
2427
2428
if ((uri = xmlParseURI((const char *) path)) != NULL) {
2429
xmlFreeURI(uri);
2430
return xmlStrdup(path);
2431
}
2432
2433
/* Check if this is an "absolute uri" */
2434
absuri = xmlStrstr(path, BAD_CAST "://");
2435
if (absuri != NULL) {
2436
int l, j;
2437
unsigned char c;
2438
xmlChar *escURI;
2439
2440
/*
2441
* this looks like an URI where some parts have not been
2442
* escaped leading to a parsing problem. Check that the first
2443
* part matches a protocol.
2444
*/
2445
l = absuri - path;
2446
/* Bypass if first part (part before the '://') is > 20 chars */
2447
if ((l <= 0) || (l > 20))
2448
goto path_processing;
2449
/* Bypass if any non-alpha characters are present in first part */
2450
for (j = 0;j < l;j++) {
2451
c = path[j];
2452
if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2453
goto path_processing;
2454
}
2455
2456
/* Escape all except the characters specified in the supplied path */
2457
escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2458
if (escURI != NULL) {
2459
/* Try parsing the escaped path */
2460
uri = xmlParseURI((const char *) escURI);
2461
/* If successful, return the escaped string */
2462
if (uri != NULL) {
2463
xmlFreeURI(uri);
2464
return escURI;
2465
}
2466
xmlFree(escURI);
2467
}
2468
}
2469
2470
path_processing:
2471
/* For Windows implementations, replace backslashes with 'forward slashes' */
2472
#if defined(_WIN32)
2473
/*
2474
* Create a URI structure
2475
*/
2476
uri = xmlCreateURI();
2477
if (uri == NULL) { /* Guard against 'out of memory' */
2478
return(NULL);
2479
}
2480
2481
len = xmlStrlen(path);
2482
if ((len > 2) && IS_WINDOWS_PATH(path)) {
2483
/* make the scheme 'file' */
2484
uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2485
/* allocate space for leading '/' + path + string terminator */
2486
uri->path = xmlMallocAtomic(len + 2);
2487
if (uri->path == NULL) {
2488
xmlFreeURI(uri); /* Guard against 'out of memory' */
2489
return(NULL);
2490
}
2491
/* Put in leading '/' plus path */
2492
uri->path[0] = '/';
2493
p = uri->path + 1;
2494
strncpy(p, (char *) path, len + 1);
2495
} else {
2496
uri->path = (char *) xmlStrdup(path);
2497
if (uri->path == NULL) {
2498
xmlFreeURI(uri);
2499
return(NULL);
2500
}
2501
p = uri->path;
2502
}
2503
/* Now change all occurrences of '\' to '/' */
2504
while (*p != '\0') {
2505
if (*p == '\\')
2506
*p = '/';
2507
p++;
2508
}
2509
2510
if (uri->scheme == NULL) {
2511
ret = xmlStrdup((const xmlChar *) uri->path);
2512
} else {
2513
ret = xmlSaveUri(uri);
2514
}
2515
2516
xmlFreeURI(uri);
2517
#else
2518
ret = xmlStrdup((const xmlChar *) path);
2519
#endif
2520
return(ret);
2521
}
2522
2523
/**
2524
* xmlPathToURI:
2525
* @path: the resource locator in a filesystem notation
2526
*
2527
* Constructs an URI expressing the existing path
2528
*
2529
* Returns a new URI, or a duplicate of the path parameter if the
2530
* construction fails. The caller is responsible for freeing the memory
2531
* occupied by the returned string. If there is insufficient memory available,
2532
* or the argument is NULL, the function returns NULL.
2533
*/
2534
xmlChar *
2535
xmlPathToURI(const xmlChar *path)
2536
{
2537
xmlURIPtr uri;
2538
xmlURI temp;
2539
xmlChar *ret, *cal;
2540
2541
if (path == NULL)
2542
return(NULL);
2543
2544
if ((uri = xmlParseURI((const char *) path)) != NULL) {
2545
xmlFreeURI(uri);
2546
return xmlStrdup(path);
2547
}
2548
cal = xmlCanonicPath(path);
2549
if (cal == NULL)
2550
return(NULL);
2551
#if defined(_WIN32)
2552
/* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2553
If 'cal' is a valid URI already then we are done here, as continuing would make
2554
it invalid. */
2555
if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2556
xmlFreeURI(uri);
2557
return cal;
2558
}
2559
/* 'cal' can contain a relative path with backslashes. If that is processed
2560
by xmlSaveURI, they will be escaped and the external entity loader machinery
2561
will fail. So convert them to slashes. Misuse 'ret' for walking. */
2562
ret = cal;
2563
while (*ret != '\0') {
2564
if (*ret == '\\')
2565
*ret = '/';
2566
ret++;
2567
}
2568
#endif
2569
memset(&temp, 0, sizeof(temp));
2570
temp.path = (char *) cal;
2571
ret = xmlSaveUri(&temp);
2572
xmlFree(cal);
2573
return(ret);
2574
}
2575
2576