Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/pkg
Path: blob/main/external/fetch/fetch.c
5238 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (c) 1998-2004 Dag-Erling Smørgrav
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer
12
* in this position and unchanged.
13
* 2. Redistributions in binary form must reproduce the above copyright
14
* notice, this list of conditions and the following disclaimer in the
15
* documentation and/or other materials provided with the distribution.
16
* 3. The name of the author may not be used to endorse or promote products
17
* derived from this software without specific prior written permission
18
*
19
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
*/
30
31
#include "bsd_compat.h"
32
33
#include <sys/param.h>
34
35
#include <netinet/in.h>
36
37
#include <errno.h>
38
#include <ctype.h>
39
#include <stdio.h>
40
#include <stdlib.h>
41
#include <string.h>
42
43
#include "fetch.h"
44
#include "common.h"
45
46
auth_t fetchAuthMethod;
47
int fetchLastErrCode;
48
char fetchLastErrString[MAXERRSTRING];
49
int fetchTimeout;
50
int fetchRestartCalls = 1;
51
int fetchDebug;
52
const char *fetchCustomHTTPHeaders;
53
int fetchSpeedLimit;
54
int fetchSpeedTime;
55
56
57
/*** Local data **************************************************************/
58
59
/*
60
* Error messages for parser errors
61
*/
62
#define URL_MALFORMED 1
63
#define URL_BAD_SCHEME 2
64
#define URL_BAD_PORT 3
65
static struct fetcherr url_errlist[] = {
66
{ URL_MALFORMED, FETCH_URL, "Malformed URL" },
67
{ URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" },
68
{ URL_BAD_PORT, FETCH_URL, "Invalid server port" },
69
{ -1, FETCH_UNKNOWN, "Unknown parser error" }
70
};
71
72
73
/*** Public API **************************************************************/
74
75
/*
76
* Select the appropriate protocol for the URL scheme, and return a
77
* read-only stream connected to the document referenced by the URL.
78
* Also fill out the struct url_stat.
79
*/
80
FILE *
81
fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
82
{
83
84
if (us != NULL) {
85
us->size = -1;
86
us->atime = us->mtime = 0;
87
}
88
if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
89
return (fetchXGetHTTP(URL, us, flags));
90
else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
91
return (fetchXGetHTTP(URL, us, flags));
92
url_seterr(URL_BAD_SCHEME);
93
return (NULL);
94
}
95
96
/*
97
* Select the appropriate protocol for the URL scheme, and return a
98
* read-only stream connected to the document referenced by the URL.
99
*/
100
FILE *
101
fetchGet(struct url *URL, const char *flags)
102
{
103
return (fetchXGet(URL, NULL, flags));
104
}
105
106
/*
107
* Select the appropriate protocol for the URL scheme, and return a
108
* write-only stream connected to the document referenced by the URL.
109
*/
110
FILE *
111
fetchPut(struct url *URL, const char *flags)
112
{
113
114
if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
115
return (fetchPutHTTP(URL, flags));
116
else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
117
return (fetchPutHTTP(URL, flags));
118
url_seterr(URL_BAD_SCHEME);
119
return (NULL);
120
}
121
122
/*
123
* Select the appropriate protocol for the URL scheme, and return the
124
* size of the document referenced by the URL if it exists.
125
*/
126
int
127
fetchStat(struct url *URL, struct url_stat *us, const char *flags)
128
{
129
130
if (us != NULL) {
131
us->size = -1;
132
us->atime = us->mtime = 0;
133
}
134
if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
135
return (fetchStatHTTP(URL, us, flags));
136
else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
137
return (fetchStatHTTP(URL, us, flags));
138
url_seterr(URL_BAD_SCHEME);
139
return (-1);
140
}
141
142
/*
143
* Select the appropriate protocol for the URL scheme, and return a
144
* list of files in the directory pointed to by the URL.
145
*/
146
struct url_ent *
147
fetchList(struct url *URL, const char *flags)
148
{
149
150
if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
151
return (fetchListHTTP(URL, flags));
152
else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
153
return (fetchListHTTP(URL, flags));
154
url_seterr(URL_BAD_SCHEME);
155
return (NULL);
156
}
157
158
/*
159
* Attempt to parse the given URL; if successful, call fetchXGet().
160
*/
161
FILE *
162
fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
163
{
164
struct url *u;
165
FILE *f;
166
167
if ((u = fetchParseURL(URL)) == NULL)
168
return (NULL);
169
170
f = fetchXGet(u, us, flags);
171
172
fetchFreeURL(u);
173
return (f);
174
}
175
176
/*
177
* Attempt to parse the given URL; if successful, call fetchGet().
178
*/
179
FILE *
180
fetchGetURL(const char *URL, const char *flags)
181
{
182
return (fetchXGetURL(URL, NULL, flags));
183
}
184
185
/*
186
* Attempt to parse the given URL; if successful, call fetchPut().
187
*/
188
FILE *
189
fetchPutURL(const char *URL, const char *flags)
190
{
191
struct url *u;
192
FILE *f;
193
194
if ((u = fetchParseURL(URL)) == NULL)
195
return (NULL);
196
197
f = fetchPut(u, flags);
198
199
fetchFreeURL(u);
200
return (f);
201
}
202
203
/*
204
* Attempt to parse the given URL; if successful, call fetchStat().
205
*/
206
int
207
fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
208
{
209
struct url *u;
210
int s;
211
212
if ((u = fetchParseURL(URL)) == NULL)
213
return (-1);
214
215
s = fetchStat(u, us, flags);
216
217
fetchFreeURL(u);
218
return (s);
219
}
220
221
/*
222
* Attempt to parse the given URL; if successful, call fetchList().
223
*/
224
struct url_ent *
225
fetchListURL(const char *URL, const char *flags)
226
{
227
struct url *u;
228
struct url_ent *ue;
229
230
if ((u = fetchParseURL(URL)) == NULL)
231
return (NULL);
232
233
ue = fetchList(u, flags);
234
235
fetchFreeURL(u);
236
return (ue);
237
}
238
239
/*
240
* Make a URL
241
*/
242
struct url *
243
fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
244
const char *user, const char *pwd)
245
{
246
struct url *u;
247
248
if (!scheme || (!host && !doc)) {
249
url_seterr(URL_MALFORMED);
250
return (NULL);
251
}
252
253
if (port < 0 || port > 65535) {
254
url_seterr(URL_BAD_PORT);
255
return (NULL);
256
}
257
258
/* allocate struct url */
259
if ((u = calloc(1, sizeof(*u))) == NULL) {
260
fetch_syserr();
261
return (NULL);
262
}
263
u->netrcfd = -1;
264
265
if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
266
fetch_syserr();
267
free(u);
268
return (NULL);
269
}
270
271
#define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
272
seturl(scheme);
273
seturl(host);
274
seturl(user);
275
seturl(pwd);
276
#undef seturl
277
u->port = port;
278
279
return (u);
280
}
281
282
/*
283
* Return value of the given hex digit.
284
*/
285
static int
286
fetch_hexval(char ch)
287
{
288
289
if (ch >= '0' && ch <= '9')
290
return (ch - '0');
291
else if (ch >= 'a' && ch <= 'f')
292
return (ch - 'a' + 10);
293
else if (ch >= 'A' && ch <= 'F')
294
return (ch - 'A' + 10);
295
return (-1);
296
}
297
298
/*
299
* Decode percent-encoded URL component from src into dst, stopping at end
300
* of string, or at @ or : separators. Returns a pointer to the unhandled
301
* part of the input string (null terminator, @, or :). No terminator is
302
* written to dst (it is the caller's responsibility).
303
*/
304
static const char *
305
fetch_pctdecode(char *dst, const char *src, size_t dlen)
306
{
307
int d1, d2;
308
char c;
309
const char *s;
310
311
for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) {
312
if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 &&
313
(d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) {
314
c = d1 << 4 | d2;
315
s += 2;
316
} else if (s[0] == '%') {
317
/* Invalid escape sequence. */
318
return (NULL);
319
} else {
320
c = *s;
321
}
322
if (dlen-- > 0)
323
*dst++ = c;
324
else
325
return (NULL);
326
}
327
return (s);
328
}
329
330
/*
331
* Apply RFC 3986 section 5.2.4 "Remove Dot Segments" in place.
332
* Collapses "." and ".." path segments so servers that refuse
333
* non-normalized request targets (e.g. paths containing "/./")
334
* do not reject otherwise valid requests.
335
*/
336
static void
337
fetch_remove_dot_segments(char *path)
338
{
339
char *in = path, *out = path;
340
341
while (*in != '\0') {
342
/*
343
* A. Drop:
344
* - leading "../"
345
* - leading "./"
346
*/
347
if (strncmp(in, "../", 3) == 0) {
348
in += 3;
349
continue;
350
}
351
if (strncmp(in, "./", 2) == 0) {
352
in += 2;
353
continue;
354
}
355
/*
356
* B. Replace:
357
* - leading "/./" -> "/"
358
* - ending "/." -> "/"
359
*/
360
if (strncmp(in, "/./", 3) == 0) {
361
in += 2;
362
continue;
363
}
364
if (in[0] == '/' && in[1] == '.' && in[2] == '\0') {
365
*out++ = '/';
366
break;
367
}
368
/*
369
* C.
370
* - replace leading "/../" -> "/" and remove the last segment
371
* - drop the ending "/.." and remove the last segment
372
*/
373
if (strncmp(in, "/../", 4) == 0) {
374
in += 3;
375
while (out > path && *--out != '/')
376
/* remove segment */;
377
continue;
378
}
379
if (in[0] == '/' && in[1] == '.' && in[2] == '.' &&
380
in[3] == '\0') {
381
in += 3;
382
while (out > path && *--out != '/')
383
/* remove segment */;
384
*out++ = '/';
385
break;
386
}
387
/*
388
* D. Drop:
389
* - bare "."
390
* - base ".."
391
*/
392
if ((in[0] == '.' && in[1] == '\0') ||
393
(in[0] == '.' && in[1] == '.' && in[2] == '\0'))
394
break;
395
/*
396
* E. now move to th next segment copying to the output.
397
*/
398
if (*in == '/')
399
*out++ = *in++;
400
while (*in != '\0' && *in != '/')
401
*out++ = *in++;
402
}
403
*out = '\0';
404
}
405
406
/*
407
* Split an URL into components. URL syntax is:
408
* [method:/][/[user[:pwd]@]host[:port]/][document]
409
* This almost, but not quite, RFC1738 URL syntax.
410
*/
411
struct url *
412
fetchParseURL(const char *URL)
413
{
414
char *doc;
415
const char *p, *q;
416
struct url *u;
417
int i, n;
418
419
/* allocate struct url */
420
if ((u = calloc(1, sizeof(*u))) == NULL) {
421
fetch_syserr();
422
return (NULL);
423
}
424
u->netrcfd = -1;
425
426
/* scheme name */
427
if ((p = strstr(URL, ":/"))) {
428
if (p - URL > URL_SCHEMELEN)
429
goto ouch;
430
for (i = 0; URL + i < p; i++)
431
u->scheme[i] = tolower((unsigned char)URL[i]);
432
URL = ++p;
433
/*
434
* Only one slash: no host, leave slash as part of document
435
* Two slashes: host follows, strip slashes
436
*/
437
if (URL[1] == '/')
438
URL = (p += 2);
439
} else {
440
p = URL;
441
}
442
if (!*URL || *URL == '/' || *URL == '.' ||
443
(u->scheme[0] == '\0' &&
444
strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
445
goto nohost;
446
447
p = strpbrk(URL, "/@");
448
if (p && *p == '@') {
449
/* username */
450
q = fetch_pctdecode(u->user, URL, URL_USERLEN);
451
if (q == NULL)
452
goto ouch;
453
454
/* password */
455
if (*q == ':') {
456
q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN);
457
if (q == NULL)
458
goto ouch;
459
}
460
p++;
461
} else {
462
p = URL;
463
}
464
465
/* hostname */
466
if (*p == '[') {
467
q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef.");
468
if (*q++ != ']')
469
goto ouch;
470
} else {
471
/* valid characters in a DNS name */
472
q = p + strspn(p, "-." "0123456789"
473
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_"
474
"abcdefghijklmnopqrstuvwxyz");
475
}
476
if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN)
477
goto ouch;
478
for (i = 0; p + i < q; i++)
479
u->host[i] = tolower((unsigned char)p[i]);
480
u->host[i] = '\0';
481
p = q;
482
483
/* port */
484
if (*p == ':') {
485
for (n = 0, q = ++p; *q && (*q != '/'); q++) {
486
if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) {
487
n = n * 10 + (*q - '0');
488
} else {
489
/* invalid port */
490
url_seterr(URL_BAD_PORT);
491
goto ouch;
492
}
493
}
494
#ifndef IPPORT_MAX
495
#define IPPORT_MAX 65535
496
#endif
497
if (p != q && (n < 1 || n > IPPORT_MAX))
498
goto ouch;
499
u->port = n;
500
p = q;
501
}
502
503
nohost:
504
/* document */
505
if (!*p)
506
p = "/";
507
508
if (strcmp(u->scheme, SCHEME_HTTP) == 0 ||
509
strcmp(u->scheme, SCHEME_HTTPS) == 0) {
510
const char hexnums[] = "0123456789abcdef";
511
512
/* percent-escape whitespace. */
513
if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
514
fetch_syserr();
515
goto ouch;
516
}
517
u->doc = doc;
518
/* fragments are reserved for client-side processing, see
519
* https://www.rfc-editor.org/rfc/rfc9110.html#section-7.1
520
*/
521
while (*p != '\0' && *p != '#') {
522
if (!isspace((unsigned char)*p)) {
523
*doc++ = *p++;
524
} else {
525
*doc++ = '%';
526
*doc++ = hexnums[((unsigned int)*p) >> 4];
527
*doc++ = hexnums[((unsigned int)*p) & 0xf];
528
p++;
529
}
530
}
531
*doc = '\0';
532
fetch_remove_dot_segments(u->doc);
533
} else if ((u->doc = strdup(p)) == NULL) {
534
fetch_syserr();
535
goto ouch;
536
}
537
538
DEBUGF("scheme: \"%s\"\n"
539
"user: \"%s\"\n"
540
"password: \"%s\"\n"
541
"host: \"%s\"\n"
542
"port: \"%d\"\n"
543
"document: \"%s\"\n",
544
u->scheme, u->user, u->pwd,
545
u->host, u->port, u->doc);
546
547
return (u);
548
549
ouch:
550
free(u);
551
return (NULL);
552
}
553
554
/*
555
* Free a URL
556
*/
557
void
558
fetchFreeURL(struct url *u)
559
{
560
free(u->doc);
561
free(u);
562
}
563
564