Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/cmd/html/htmlrefs.c
1808 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1996-2012 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Glenn Fowler <[email protected]> *
18
* *
19
***********************************************************************/
20
#pragma prototyped
21
/*
22
* Glenn Fowler
23
* AT&T Research
24
*/
25
26
static const char usage[] =
27
"[-?\n@(#)$Id: htmlrefs (AT&T Research) 2012-01-01 $\n]"
28
USAGE_LICENSE
29
"[+NAME?htmlrefs - list html url references]"
30
"[+DESCRIPTION?\bhtmlrefs\b lists url references from the"
31
" local closure of the input \bhtml\b \afile\as. If \afile\a is not"
32
" specified then the top level default user file is read. The \bhtml\b"
33
" parse is rudimentary; don't use \bhtmlrefs\b to detect valid \bhtml\b"
34
" files.]"
35
"[+?The top level references are determined in this order (the \b--index\b,"
36
" \b--root\b and \b--user\b options influence the order):]{"
37
" [+$HOME/index.html?Pseudo index containing"
38
" \b<LINK href=\b\adir\a \brel=\b\atype\a\b>\b references to"
39
" top level directories. \atype\a may be one of:]{"
40
" [+document-root?The document root directory containing URL"
41
" target documents. Exactly one \bdocument-root\b must"
42
" be specified.]"
43
" [+program-root?The program root directory containing CGI"
44
" support programs and scripts. This type is optional."
45
" If specified then the program root directory should"
46
" contain a pseudo index for its references.]"
47
" [+data-root?The data root directory containing CGI"
48
" support data. This type is optional. If specified then"
49
" the data root directory should contain a pseudo index"
50
" for its references.]"
51
" [+dynamic?All files under \adir\a are considered referenced.]"
52
" [+host?Provides a default value for the \b--hosts\b option.]"
53
" [+ignore?\adir\a is a \bksh\b(1) pattern of paths to ignore.]"
54
" [+internal?If \b--external\b is on then \adir\a is a \bksh\b(1)"
55
" pattern of internal paths.]"
56
" [+secure?Files under this dir are accessed by \bhttps:\b only.]"
57
" }"
58
" [+$HOME/wwwfiles/index.html?]"
59
" [+$HOME/public_html/index.html?]"
60
"}"
61
"[a:all?List all references whether they exist or not.]"
62
"[c:copy?Copy the selected references to \adirectory\a which must already"
63
" exist. If \b--external\b is also specified then lines between"
64
" \b<!--INTERNAL-->\b ... \b<!--/INTERNAL-->\b lines are not"
65
" copied. If \b--unreferenced\b is also specified then files and"
66
" directories in \adirectory\a that have not been copied are"
67
" removed. Target file modification times are set to match source"
68
" times so that future copies can be avoided.]:[directory]"
69
"[d:dependents?List each selected local file followed by \b:\b and a list of"
70
" all local files referring to the file.]"
71
"[e:external?Do not list references inside \b<!--INTERNAL-->\b ..."
72
" \b<!--/INTERNAL-->\b lines. See \bmm2html\b(1) for an html"
73
" generator that inserts these lines.]"
74
"[F:force?By default files are not copied if the source and target size and"
75
" modification times match. \b--force\b forces all files to be copied.]"
76
"[h:hosts?Check only references matching the \bksh\b(1) pattern"
77
" \bhttp://\b\apattern\a\b/\b.]:[pattern]"
78
"[i:index?\aname\a specifies the page named by directory"
79
" references.]:[name:=index.html]"
80
"[k:keep?\apattern\a is used to match file base names that are always"
81
" considered referenced.]:[pattern:=.htaccess]"
82
"[l:limit?Limit \b--copy\b and \b--remove\b operations to path names matching"
83
" \apattern\a.]:[pattern]"
84
"[m:missing?List missing local file references.]"
85
"[n!:exec?Enable file modification operations. \b--noexec\b lists the"
86
" operations but does not do them.]"
87
"[p!:perlwarn?Check HTML files for unintentional embedded \bperl\b(1)"
88
" constructs: a left bracket followed by one of \b-+!$*#\b. Manually"
89
" translating left bracket to \b&#0091;\b avoids unwanted \bperl\b"
90
" interactions (why didn't they use tags like everyone else?)"
91
" \bmm2html\b(1) and \boptget\b(3) do the translation by default.]"
92
"[X:remove?Unreferenced files are removed when \b--unreferenced\b and"
93
" \b--nocopy\b are specified.]"
94
"[r:root?The local \adirectory\a for \b--user\b"
95
" references.]:[directory:=~\auser\a]"
96
"[K:skip?\apattern\a is used to match file base names that are never"
97
" considered referenced.]:[pattern:=00-INDEX-00]"
98
"[s:strict?By default unreferenced \b--index\b files and the containing"
99
" directory are considered referenced; \b--strict\b considers"
100
" unreferenced \b--index\b files unreferenced.]"
101
"[S:symlink?Instruct \b--copy\b to \bsymlink\b(2) files that do not contain"
102
" \b<!--INTERNAL-->\b ... \b<!--/INTERNAL-->\b or are not in"
103
" \b/cgi-bin/\b.]"
104
"[u:user?\b~\b\aname\a translates to the \b--root\b"
105
" directory.]:[name:=caller-uid]"
106
"[v:verbose?List files as they are copied (see \b--copy\b.)]"
107
"[w:warn?Produce a warning diagnostic for missing files.]"
108
"[x:unreferenced?If \b--copy\b is also specified then remove files and"
109
" directories in the \b--copy\b \adirectory\a that have not been copied."
110
" Otherwise list unreferenced files in the \b--root\b directory."
111
" A directory that contains no referenced files but does contain an"
112
" \b--index\b file is considered referenced (along with the \b--index\b"
113
" file) unless \b--strict\b is enabled.]"
114
115
"\n"
116
"\n[ file ... ]\n"
117
"\n"
118
119
"[+EXAMPLES]{"
120
" [+htmlrefs --hosts=www.research.att.com --missing?List missing"
121
" references to the local host \bwww.research.att.com\b.]"
122
" [+htmlrefs -n -h www.research.att.com -c ~/external/wwwfiles -e -x?Copy"
123
" the local hierarchy to \b~/external/wwwfiles\b for external"
124
" release, and remove unreferenced files in the copy.]"
125
"}"
126
"[+SEE ALSO?\bhtml2rtf\b(1), \bmm2html\b(1)]"
127
;
128
129
#include <ast.h>
130
#include <cdt.h>
131
#include <ctype.h>
132
#include <error.h>
133
#include <fts.h>
134
#include <glob.h>
135
#include <pwd.h>
136
#include <tm.h>
137
138
#define INDEX "index.html"
139
#define KEEP ".htaccess"
140
#define SKIP "00-INDEX-00"
141
142
#define CHECKED 0x001
143
#define COPIED 0x002
144
#define COPY 0x004
145
#define DIRECTORY 0x008
146
#define EXTERNAL 0x010
147
#define FILTER 0x020
148
#define INTERNAL 0x040
149
#define MISSING 0x080
150
#define SCANNED 0x100
151
#define SECURE 0x200
152
#define VERBOSE 0x400
153
154
#define HIT (-1)
155
#define MISS (-2)
156
157
#define STUFF(s, buf, c) ((s < &buf[sizeof(buf)]) ? (*s++ = c) : -1)
158
159
struct List_s;
160
161
typedef struct String_s
162
{
163
char* data;
164
unsigned int size;
165
} String_t;
166
167
typedef struct File_s
168
{
169
Dtlink_t link;
170
unsigned long time;
171
unsigned int flags;
172
struct List_s* refs;
173
char name[1];
174
} File_t;
175
176
typedef struct List_s
177
{
178
struct List_s* next;
179
File_t* file;
180
} List_t;
181
182
typedef struct State_s
183
{
184
Dtdisc_t disc;
185
Dt_t* files;
186
187
int all;
188
int dependents;
189
int exec;
190
int external;
191
int force;
192
int missing;
193
int more;
194
int perlwarn;
195
int remove;
196
int strict;
197
int symlink;
198
int unreferenced;
199
int verbose;
200
int warn;
201
202
String_t copy;
203
String_t dataroot;
204
String_t documentroot;
205
String_t hosts;
206
String_t ignore;
207
String_t index;
208
String_t internal;
209
String_t keep;
210
String_t limit;
211
String_t programroot;
212
String_t root;
213
String_t skip;
214
String_t user;
215
216
char buf[PATH_MAX];
217
char dir[PATH_MAX];
218
char tmp[PATH_MAX];
219
} State_t;
220
221
static const char internal[] = "<!--INTERNAL-->";
222
static const char external[] = "<!--/INTERNAL-->";
223
224
static int
225
keep(State_t* state, const char* name, int mode)
226
{
227
char* s;
228
229
if (state->skip.size)
230
{
231
if (s = strrchr(name, '/'))
232
s++;
233
else
234
s = (char*)name;
235
if (strmatch(s, state->skip.data))
236
return 0;
237
}
238
if (mode >= 0 && access(name, mode))
239
return 0;
240
return 1;
241
}
242
243
/*
244
* check for glob(dir/name)
245
*/
246
247
static void
248
check(register State_t* state, const char* dir, const char* name, unsigned int flags)
249
{
250
register File_t* dp;
251
register char* s;
252
register char** p;
253
glob_t gl;
254
255
memset(&gl, 0, sizeof(gl));
256
sfsprintf(state->dir, sizeof(state->dir) - 1, "%s/(%s)", dir, name);
257
if (!glob(state->dir, GLOB_AUGMENTED|GLOB_DISC|GLOB_STACK, 0, &gl))
258
for (p = gl.gl_pathv; s = *p++;)
259
if (!dtmatch(state->files, s) && keep(state, s, F_OK))
260
{
261
if (!(dp = newof(0, File_t, 1, strlen(s))))
262
error(ERROR_SYSTEM|3, "out of space [file]");
263
strcpy(dp->name, s);
264
dtinsert(state->files, dp);
265
dp->flags |= flags;
266
}
267
}
268
269
/*
270
* add reference path s
271
*/
272
273
static File_t*
274
add(register State_t* state, register char* s, unsigned int flags, const char* path, int prefix, File_t* ref)
275
{
276
register char* t;
277
register File_t* fp;
278
register File_t* dp;
279
register List_t* lp;
280
char* u;
281
struct stat st;
282
283
if (!(flags & COPIED))
284
{
285
if (ref && (ref->flags & SECURE))
286
flags |= SECURE;
287
if (state->hosts.size)
288
{
289
if (t = strchr(s, ':'))
290
{
291
if (strneq(s, "http://", t - s + 3))
292
{
293
s = t + 3;
294
flags &= ~SECURE;
295
}
296
else if (strneq(s, "https://", t - s + 4))
297
{
298
s = t + 4;
299
flags |= SECURE;
300
}
301
else
302
return 0;
303
if (t = strchr(s, '/'))
304
*t = 0;
305
if (!strmatch(s, state->hosts.data))
306
return 0;
307
if (t)
308
*(s = t) = '/';
309
else
310
s = "/";
311
}
312
if (*s == '/')
313
{
314
if (ref && !streq(s, ref->name))
315
{
316
if (*(s + 1) != '~')
317
return 0;
318
if (*(s + 2) == '/')
319
s += 2;
320
else if (!state->user.size || !strneq(s + 2, state->user.data, state->user.size) || *(s + 2 + state->user.size) != '/')
321
return 0;
322
else
323
s += 2 + state->user.size;
324
if (state->documentroot.size)
325
{
326
sfsprintf(state->buf, sizeof(state->buf) - 1, "%s%s%s", state->documentroot.data, (flags & SECURE) ? "/secure" : "", s);
327
pathcanon(s = state->buf, sizeof(state->buf), 0);
328
}
329
else if (state->root.size)
330
{
331
sfsprintf(state->buf, sizeof(state->buf) - 1, "%s%s", state->root.data, (flags & SECURE) ? "/secure" : "", s);
332
pathcanon(s = state->buf, sizeof(state->buf), 0);
333
}
334
}
335
}
336
else if (prefix)
337
{
338
sfsprintf(state->buf, sizeof(state->buf) - 1, "%-.*s%s", prefix, path, s);
339
pathcanon(s = state->buf, sizeof(state->buf), 0);
340
}
341
else if (flags & SECURE)
342
{
343
sfsprintf(state->tmp, sizeof(state->tmp), "secure/%s", s);
344
s = state->tmp;
345
}
346
}
347
if (*s == '.' && *(s + 1) == '/')
348
while (*++s == '/');
349
if (!*s)
350
s = "/";
351
for (t = s + strlen(s); t > s && *(t - 1) == '/'; t--);
352
if (*t == '/' || !stat(s, &st) && S_ISDIR(st.st_mode))
353
{
354
if (s >= state->buf && s < state->buf + sizeof(state->buf))
355
{
356
if (!*t)
357
*t = '/';
358
sfsprintf(t + 1, sizeof(state->buf) - (t - s + 2), "%s", state->index.data);
359
}
360
else
361
{
362
sfsprintf(state->buf, sizeof(state->buf) - 1, "%-.*s/%s", t - s, s, state->index.data);
363
s = state->buf;
364
}
365
}
366
}
367
if (!(fp = (File_t*)dtmatch(state->files, s)))
368
{
369
if (!keep(state, s, -1))
370
return 0;
371
if (!(fp = newof(0, File_t, 1, strlen(s))))
372
error(ERROR_SYSTEM|3, "out of space [file]");
373
strcpy(fp->name, s);
374
dtinsert(state->files, fp);
375
state->more = 1;
376
if (t = strrchr(s, '/'))
377
do
378
{
379
*t = 0;
380
if (dp = (File_t*)dtmatch(state->files, s))
381
{
382
*t = '/';
383
break;
384
}
385
if (!(dp = newof(0, File_t, 1, strlen(s))))
386
error(ERROR_SYSTEM|3, "out of space [file]");
387
strcpy(dp->name, s);
388
dtinsert(state->files, dp);
389
dp->flags |= DIRECTORY|flags;
390
if (!(flags & COPIED))
391
{
392
if (!state->strict)
393
check(state, s, state->index.data, flags);
394
if (state->keep.size)
395
check(state, s, state->keep.data, flags);
396
}
397
u = strrchr(s, '/');
398
*t = '/';
399
} while ((t = u) && (t - s) > state->root.size);
400
}
401
fp->flags |= flags;
402
if (ref && state->dependents)
403
{
404
for (lp = fp->refs; lp && lp->file != ref; lp = lp->next);
405
if (!lp)
406
{
407
if (!(lp = newof(0, List_t, 1, 0)))
408
error(ERROR_SYSTEM|3, "out of space [file]");
409
lp->file = ref;
410
lp->next = fp->refs;
411
fp->refs = lp;
412
}
413
}
414
return fp;
415
}
416
417
/*
418
* order directory stream by name
419
*/
420
421
static int
422
order(FTSENT* const* a, FTSENT* const* b)
423
{
424
return strcmp((*a)->fts_name, (*b)->fts_name);
425
}
426
427
/*
428
* parse and set root dir r from s
429
* possibly using tmp buffer buf
430
*/
431
432
static void
433
rootdir(State_t* state, register String_t* r, register char* s, char* buf, size_t z)
434
{
435
register char* t;
436
register int n;
437
438
if (t = strrchr(s, '/'))
439
*t = 0;
440
if (*s == '/')
441
n = strlen(s);
442
else
443
{
444
n = sfsprintf(buf, z, "%s/%s", state->root.data, s);
445
s = buf;
446
}
447
if (!(r->data = strdup(s)))
448
error(ERROR_SYSTEM|3, "out of space [rootdir]");
449
r->size = n;
450
if (t)
451
*t = '/';
452
}
453
454
/*
455
* return next directory entry
456
*/
457
458
static FTSENT*
459
scan(State_t* state, FTS* fts)
460
{
461
FTSENT* ent;
462
Sfio_t* sp;
463
char* s;
464
int skip;
465
466
while (ent = fts_read(fts))
467
{
468
if (state->external && ent->fts_info == FTS_D)
469
{
470
sfsprintf(state->buf, sizeof(state->buf) - 1, "%s/%s", ent->fts_path, state->index.data);
471
if (sp = sfopen(NiL, state->buf, "r"))
472
{
473
skip = 0;
474
while (s = sfgetr(sp, '\n', 1))
475
{
476
if (strgrpmatch(s, internal, NiL, 0, 0))
477
{
478
skip = 1;
479
break;
480
}
481
else if (strgrpmatch(s, "</HEAD>", NiL, 0, STR_ICASE))
482
break;
483
}
484
sfclose(sp);
485
if (skip)
486
{
487
if (fts_set(NiL, ent, FTS_SKIP))
488
error(1, "%s: cannot skip", ent->fts_path);
489
continue;
490
}
491
}
492
}
493
break;
494
}
495
return ent;
496
}
497
498
/*
499
* process refs in path
500
*/
501
502
static void
503
refs(register State_t* state, const char* path, register Sfio_t* ip, File_t* ref)
504
{
505
register int c;
506
register int q;
507
register int r;
508
register int a;
509
register char* s;
510
char* p;
511
char* t;
512
File_t* f;
513
String_t* v;
514
int m;
515
int perlwarn;
516
int prefix;
517
unsigned int secure;
518
unsigned int flags;
519
520
char buf[8 * 1024];
521
522
perlwarn = state->perlwarn && strmatch(path, "*.(html|htm|HTML|HTM)");
523
prefix = (s = strrchr(path, '/')) ? s - (char*)path + 1 : 0;
524
flags = EXTERNAL;
525
for (;;)
526
{
527
switch (c = sfgetc(ip))
528
{
529
case EOF:
530
break;
531
case '<':
532
q = 0;
533
s = buf;
534
for (;;)
535
{
536
switch (c = sfgetc(ip))
537
{
538
case EOF:
539
return;
540
case '>':
541
sfungetc(ip, c);
542
break;
543
default:
544
if (isspace(c))
545
break;
546
STUFF(s, buf, c);
547
continue;
548
}
549
break;
550
}
551
q = 0;
552
if (flags != INTERNAL && (s == (buf + 1) && (buf[0] == 'A' || buf[0] == 'a') || s == (buf + 4) && (buf[0] == 'L' || buf[0] == 'l') && (buf[1] == 'I' || buf[1] == 'i') && (buf[2] == 'N' || buf[2] == 'n') && (buf[3] == 'K' || buf[3] == 'k')))
553
{
554
s = buf;
555
r = a = 0;
556
f = 0;
557
for (;;)
558
{
559
switch (c = sfgetc(ip))
560
{
561
case EOF:
562
return;
563
case '\'':
564
case '"':
565
if (q == c)
566
q = 0;
567
else if (q == 0)
568
q = c;
569
else if (r == HIT)
570
STUFF(s, buf, c);
571
continue;
572
case '>':
573
case ' ':
574
case '\t':
575
case '\n':
576
if (!q)
577
{
578
if (r == HIT)
579
{
580
/*UNDENT...*/
581
582
*s = 0;
583
s = buf;
584
if (!a)
585
f = add(state, s, flags, path, prefix, ref);
586
else if (f)
587
{
588
p = f->name;
589
if (!strcasecmp(s, "data-root"))
590
rootdir(state, &state->dataroot, p, buf, sizeof(buf));
591
else if (!strcasecmp(s, "document-root"))
592
rootdir(state, &state->documentroot, p, buf, sizeof(buf));
593
else if (!strcasecmp(s, "host") || !strcasecmp(s, "hosts"))
594
{
595
if (!state->hosts.size && (state->hosts.size = strlen(p)) && !(state->hosts.data = strdup(p)))
596
error(ERROR_SYSTEM|3, "out of space [hosts]");
597
}
598
else if (!strcasecmp(s, "program-root"))
599
rootdir(state, &state->programroot, p, buf, sizeof(buf));
600
else if ((secure = strcasecmp(s, "secure") ? 0 : SECURE) || !strcasecmp(s, "dynamic"))
601
{
602
FTS* fts;
603
FTSENT* ent;
604
605
if (t = strrchr(p, '/'))
606
*t = 0;
607
fts = fts_open((char**)p, FTS_ONEPATH|FTS_META|FTS_PHYSICAL|FTS_NOPOSTORDER, order);
608
if (t)
609
*t = '/';
610
if (fts)
611
{
612
while (ent = scan(state, fts))
613
add(state, ent->fts_path + prefix, flags|secure, f->name, prefix, f);
614
if (fts_close(fts))
615
error(ERROR_SYSTEM|2, "%s: directory read error", p);
616
}
617
}
618
else if (!strcasecmp(s, "ignore") && (v = &state->ignore) || state->external && !strcasecmp(s, "internal") && (v = &state->internal))
619
{
620
if (state->copy.size)
621
{
622
s = state->copy.data;
623
p += state->root.size;
624
}
625
else
626
s = "";
627
if (t = strrchr(p, '/'))
628
*t = 0;
629
m = v->size + strlen(s) + strlen(p) + 6;
630
if (!(v->data = newof(v->data, char, m, 0)))
631
error(ERROR_SYSTEM|3, "out of space [path pattern]");
632
v->size += sfsprintf(v->data + v->size, m, "%s%s%s?(/*)", v->size ? "|" : "", s, p);
633
if (t)
634
*t = '/';
635
}
636
}
637
638
/*...INDENT*/
639
}
640
if (c == '>')
641
break;
642
r = a = 0;
643
}
644
else if (r == HIT)
645
STUFF(s, buf, c);
646
continue;
647
case '#':
648
case '?':
649
if (r == HIT)
650
STUFF(s, buf, 0);
651
continue;
652
case 'H':
653
case 'h':
654
if (r == HIT)
655
STUFF(s, buf, c);
656
else if (!q)
657
r = (r == 0) ? 1 : MISS;
658
continue;
659
case 'R':
660
case 'r':
661
if (r == HIT)
662
STUFF(s, buf, c);
663
else if (!q)
664
{
665
if (r == 0)
666
{
667
a = 10;
668
r = a + 1;
669
}
670
r = (r == (a + 1)) ? (a + 2) : MISS;
671
}
672
continue;
673
case 'E':
674
case 'e':
675
if (r == HIT)
676
STUFF(s, buf, c);
677
else if (!q)
678
r = (r == (a + 2)) ? (a + 3) : MISS;
679
continue;
680
case 'F':
681
case 'f':
682
if (r == HIT)
683
STUFF(s, buf, c);
684
else if (!q)
685
r = (r == 3) ? 4 : MISS;
686
continue;
687
case 'L':
688
case 'l':
689
if (r == HIT)
690
STUFF(s, buf, c);
691
else if (!q)
692
r = (r == (a + 3)) ? (a + 4) : MISS;
693
continue;
694
case '=':
695
if (r == HIT)
696
STUFF(s, buf, c);
697
else if (!q)
698
r = (r == (a + 4)) ? HIT : MISS;
699
continue;
700
default:
701
if (r == HIT)
702
STUFF(s, buf, c);
703
continue;
704
}
705
break;
706
}
707
}
708
else if (flags != INTERNAL && (s == (buf + 5) && (buf[0] == 'F' || buf[0] == 'f') && (buf[1] == 'R' || buf[1] == 'r') && (buf[2] == 'A' || buf[2] == 'a') && (buf[3] == 'M' || buf[3] == 'm') && (buf[4] == 'E' || buf[4] == 'e') || s == (buf + 3) && (buf[0] == 'I' || buf[0] == 'i') && (buf[1] == 'M' || buf[1] == 'm') && (buf[2] == 'G' || buf[2] == 'g') || s == (buf + 6) && (buf[0] == 'S' || buf[0] == 's') && (buf[1] == 'C' || buf[1] == 'c') && (buf[2] == 'R' || buf[2] == 'r') && (buf[3] == 'I' || buf[3] == 'i') && (buf[4] == 'P' || buf[4] == 'p') && (buf[5] == 'T' || buf[5] == 't')))
709
{
710
s = buf;
711
r = 0;
712
for (;;)
713
{
714
switch (c = sfgetc(ip))
715
{
716
case EOF:
717
return;
718
case '\'':
719
case '"':
720
if (q == c)
721
q = 0;
722
else if (q == 0)
723
q = c;
724
else if (r == HIT)
725
STUFF(s, buf, c);
726
continue;
727
case '>':
728
case ' ':
729
case '\t':
730
case '\n':
731
if (!q)
732
{
733
if (r == HIT)
734
{
735
*s = 0;
736
s = buf;
737
add(state, s, flags, path, prefix, ref);
738
}
739
if (c == '>')
740
break;
741
r = 0;
742
}
743
else if (r == HIT)
744
STUFF(s, buf, c);
745
continue;
746
case 'S':
747
case 's':
748
if (r == HIT)
749
STUFF(s, buf, c);
750
else if (!q)
751
r = (r == 0) ? 1 : MISS;
752
continue;
753
case 'R':
754
case 'r':
755
if (r == HIT)
756
STUFF(s, buf, c);
757
else if (!q)
758
r = (r == 1) ? 2 : MISS;
759
continue;
760
case 'C':
761
case 'c':
762
if (r == HIT)
763
STUFF(s, buf, c);
764
else if (!q)
765
r = (r == 2) ? 3 : MISS;
766
continue;
767
case '=':
768
if (r == HIT)
769
STUFF(s, buf, c);
770
else if (!q)
771
r = (r == 3) ? HIT : MISS;
772
continue;
773
default:
774
if (r == HIT)
775
STUFF(s, buf, c);
776
continue;
777
}
778
break;
779
}
780
}
781
else
782
{
783
if (state->external)
784
{
785
if (flags == EXTERNAL)
786
{
787
if (s == (buf + sizeof(internal) - 3) && strneq(buf, internal + 1, sizeof(internal) - 3))
788
{
789
flags = INTERNAL;
790
ref->flags |= FILTER;
791
}
792
}
793
else
794
{
795
if (s == (buf + sizeof(external) - 3) && strneq(buf, external + 1, sizeof(external) - 3))
796
flags = EXTERNAL;
797
}
798
}
799
for (;;)
800
{
801
switch (c = sfgetc(ip))
802
{
803
case EOF:
804
return;
805
case '\'':
806
case '"':
807
if (q == c)
808
q = 0;
809
else if (q == 0)
810
q = c;
811
continue;
812
case '>':
813
if (q == 0)
814
break;
815
continue;
816
default:
817
continue;
818
}
819
break;
820
}
821
}
822
continue;
823
case '[':
824
if (perlwarn && (c = sfgetc(ip)) != EOF)
825
{
826
sfungetc(ip, c);
827
switch (c)
828
{
829
case '-':
830
case '+':
831
case '!':
832
case '$':
833
case '*':
834
case '#':
835
error(1, "%s: file contains embedded perl constructs", path);
836
perlwarn = 0;
837
break;
838
}
839
}
840
continue;
841
default:
842
if ((iscntrl(c) || !isprint(c)) && !isspace(c))
843
break;
844
continue;
845
}
846
break;
847
}
848
}
849
850
/*
851
* filter out internal text
852
* return: <0:error 0:drop >0:keep
853
*/
854
855
static int
856
filter(register State_t* state, register Sfio_t* ip, Sfio_t* op)
857
{
858
register char* s;
859
register size_t n;
860
register size_t lines = 0;
861
register int head = 1;
862
863
for (;;)
864
{
865
if (!(s = sfgetr(ip, '\n', head)))
866
break;
867
if ((n = sfvalue(ip)) != sizeof(internal) || !strneq(s, internal, sizeof(internal) - 1))
868
{
869
if (head)
870
sfputr(op, s, '\n');
871
else
872
sfwrite(op, s, n);
873
lines++;
874
if (head && strgrpmatch(s, "</HEAD>", NiL, 0, STR_ICASE))
875
head = 0;
876
}
877
else
878
{
879
while ((s = sfgetr(ip, '\n', 0)) && (sfvalue(ip) != sizeof(external) || !strneq(s, external, sizeof(external) - 1)));
880
if (!s)
881
{
882
if (head)
883
return 0;
884
break;
885
}
886
}
887
}
888
if (sfvalue(ip) && (s = sfgetr(ip, -1, 0)) && (n = sfvalue(ip)))
889
sfwrite(op, s, n);
890
return lines > 1;
891
}
892
893
int
894
main(int argc, char** argv)
895
{
896
register char* s;
897
register char* p;
898
register Sfio_t* ip;
899
register State_t* state;
900
register File_t* fp;
901
register List_t* lp;
902
FTS* fts;
903
FTSENT* ent;
904
struct passwd* pwd;
905
Sfio_t* op;
906
char* dirs[4];
907
int i;
908
int n;
909
struct stat st;
910
struct stat ts;
911
912
static const char* www[] = { 0, 0, "wwwfiles", "public_html" };
913
914
NoP(argc);
915
error_info.id = "htmlrefs";
916
if (!(state = newof(0, State_t, 1, 0)))
917
error(ERROR_SYSTEM|3, "out of space [state]");
918
state->disc.key = offsetof(File_t, name);
919
state->disc.size = 0;
920
if (!(state->files = dtopen(&state->disc, Dtoset)))
921
error(ERROR_SYSTEM|3, "out of space [dict]");
922
state->exec = 1;
923
state->perlwarn = 1;
924
for (;;)
925
{
926
switch (optget(argv, usage))
927
{
928
case 'a':
929
state->all = opt_info.num;
930
continue;
931
case 'c':
932
state->copy.size = strlen(state->copy.data = opt_info.arg);
933
continue;
934
case 'd':
935
state->dependents = opt_info.num;
936
continue;
937
case 'e':
938
state->external = opt_info.num;
939
continue;
940
case 'F':
941
state->force = opt_info.num;
942
continue;
943
case 'h':
944
state->hosts.size = strlen(state->hosts.data = opt_info.arg);
945
continue;
946
case 'i':
947
state->index.size = strlen(state->index.data = opt_info.arg);
948
continue;
949
case 'k':
950
state->keep.size = strlen(state->keep.data = opt_info.arg);
951
continue;
952
case 'K':
953
state->skip.size = strlen(state->skip.data = opt_info.arg);
954
continue;
955
case 'l':
956
state->limit.size = strlen(state->limit.data = opt_info.arg);
957
continue;
958
case 'm':
959
state->missing = opt_info.num ? MISSING : 0;
960
continue;
961
case 'n':
962
state->exec = opt_info.num;
963
continue;
964
case 'r':
965
state->root.size = strlen(state->root.data = opt_info.arg);
966
continue;
967
case 's':
968
state->strict = opt_info.num;
969
continue;
970
case 'u':
971
state->user.size = strlen(state->user.data = opt_info.arg);
972
continue;
973
case 'v':
974
state->verbose = opt_info.num;
975
continue;
976
case 'w':
977
state->warn = opt_info.num;
978
continue;
979
case 'x':
980
state->unreferenced = opt_info.num;
981
continue;
982
case 'S':
983
state->symlink = opt_info.num;
984
case 'X':
985
state->remove = opt_info.num;
986
continue;
987
case '?':
988
error(ERROR_USAGE|4, "%s", opt_info.arg);
989
continue;
990
case ':':
991
error(2, "%s", opt_info.arg);
992
continue;
993
}
994
break;
995
}
996
argv += opt_info.index;
997
if (error_info.errors)
998
error(ERROR_USAGE|4, "%s", optusage(NiL));
999
if (state->copy.size && (stat(state->copy.data, &st) || !S_ISDIR(st.st_mode)))
1000
error(ERROR_SYSTEM|3, "%s: not a directory", state->copy.data);
1001
if (!state->index.size)
1002
state->index.size = strlen(state->index.data = INDEX);
1003
if (!state->keep.size)
1004
state->keep.size = strlen(state->keep.data = KEEP);
1005
if (!state->skip.size)
1006
state->skip.size = strlen(state->skip.data = SKIP);
1007
if (!state->user.size)
1008
state->user.size = strlen(state->user.data = fmtuid(geteuid()));
1009
if (!state->root.size || *state->root.data != '/')
1010
{
1011
www[0] = (const char*)state->index.data;
1012
if (state->root.size)
1013
www[1] = (const char*)state->root.data;
1014
if (!(pwd = getpwnam(state->user.data)))
1015
error(3, "%s: unknown user", state->user.data);
1016
s = pwd->pw_dir;
1017
for (i = 0; i < elementsof(www); i++)
1018
if (www[i])
1019
{
1020
n = sfsprintf(state->buf, sizeof(state->buf) - 1, "%s/%s", s, www[i]);
1021
if (!access(state->buf, F_OK))
1022
{
1023
if (i == 0)
1024
n = strlen(s);
1025
else
1026
s = state->buf;
1027
if (!(state->root.data = strdup(s)))
1028
error(ERROR_SYSTEM|3, "out of space [root]");
1029
state->root.size = n;
1030
break;
1031
}
1032
}
1033
}
1034
while (s = *argv++)
1035
add(state, s, EXTERNAL|VERBOSE, NiL, 0, NiL);
1036
if (!state->more)
1037
{
1038
sfsprintf(state->buf, sizeof(state->buf) - 1, "%s/%s", state->root.data, state->index.data);
1039
add(state, state->buf, EXTERNAL|VERBOSE, NiL, 0, NiL);
1040
}
1041
while (state->more)
1042
{
1043
state->more = 0;
1044
for (fp = (File_t*)dtfirst(state->files); fp; fp = (File_t*)dtnext(state->files, fp))
1045
{
1046
if (!(fp->flags & SCANNED))
1047
{
1048
fp->flags |= SCANNED;
1049
if (streq(fp->name, "-") || streq(fp->name, "/dev/stdin") || streq(fp->name, "/dev/fd/0"))
1050
ip = sfstdin;
1051
else if (!(ip = sfopen(NiL, fp->name, "r")))
1052
{
1053
fp->flags |= MISSING;
1054
if (state->warn || (fp->flags & VERBOSE))
1055
error(ERROR_SYSTEM|2, "%s: cannot read", fp->name);
1056
continue;
1057
}
1058
refs(state, fp->name, ip, fp);
1059
if (ip != sfstdin)
1060
sfclose(ip);
1061
}
1062
}
1063
}
1064
if (state->copy.size)
1065
{
1066
p = state->buf;
1067
for (fp = (File_t*)dtfirst(state->files); fp; fp = (File_t*)dtnext(state->files, fp))
1068
if (!(fp->flags & (CHECKED|COPIED|MISSING)))
1069
{
1070
fp->flags |= CHECKED;
1071
sfsprintf(p, sizeof(state->buf) - 1, "%s%s", state->copy.data, fp->name + state->root.size);
1072
if (state->internal.size && strmatch(p, state->internal.data))
1073
continue;
1074
add(state, p, COPIED, NiL, 0, NiL);
1075
if (stat(fp->name, &st))
1076
error(ERROR_SYSTEM|3, "%s: cannot stat", fp->name);
1077
if (state->limit.size && !strmatch(p, state->limit.data))
1078
continue;
1079
if (stat(p, &ts))
1080
{
1081
ts.st_mtime = 0;
1082
ts.st_mode = 0;
1083
}
1084
if (strmatch(p, "*/cgi-bin/*|*.cgi|*.html"))
1085
fp->flags |= COPY;
1086
if (!state->exec)
1087
{
1088
if (fp->flags & DIRECTORY)
1089
{
1090
if (!ts.st_mtime)
1091
sfprintf(sfstdout, " mkdir %s\n", p);
1092
}
1093
else if (state->force || st.st_mtime != ts.st_mtime)
1094
{
1095
if (fp->flags & FILTER)
1096
sfprintf(sfstdout, "filter %s\n", p);
1097
else if (state->symlink && !(fp->flags & COPY))
1098
sfprintf(sfstdout, " link %s\n", p);
1099
else
1100
sfprintf(sfstdout, " copy %s\n", p);
1101
}
1102
}
1103
else if (fp->flags & DIRECTORY)
1104
{
1105
if (!ts.st_mtime)
1106
{
1107
if (state->verbose)
1108
sfprintf(sfstdout, " mkdir %s\n", p);
1109
if (mkdir(p, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH))
1110
error(ERROR_SYSTEM|2, "%s: cannot create directory", p);
1111
}
1112
}
1113
else if (state->symlink && !(fp->flags & (COPY|FILTER)))
1114
{
1115
if (st.st_mtime != ts.st_mtime)
1116
{
1117
if (state->verbose)
1118
sfprintf(sfstdout, " ln -s %s %s\n", fp->name, p);
1119
if (ts.st_mtime)
1120
remove(p);
1121
if (symlink(fp->name, p))
1122
error(ERROR_SYSTEM|2, "%s: cannot symlink to %s", fp->name, p);
1123
}
1124
}
1125
else if (state->force || st.st_mtime != ts.st_mtime)
1126
{
1127
if (!(ip = sfopen(NiL, fp->name, "r")))
1128
error(ERROR_SYSTEM|2, "%s: cannot read", fp->name);
1129
else if (!(op = sfopen(NiL, p, "w")))
1130
{
1131
error(ERROR_SYSTEM|2, "%s: cannot write", p);
1132
sfclose(ip);
1133
}
1134
else
1135
{
1136
if (fp->flags & FILTER)
1137
{
1138
if (state->verbose)
1139
sfprintf(sfstdout, "filter %s\n", p);
1140
n = filter(state, ip, op);
1141
}
1142
else
1143
{
1144
if (state->verbose)
1145
sfprintf(sfstdout, " copy %s\n", p);
1146
if (sfmove(ip, op, SF_UNBOUND, -1) >= 0 && sfeof(ip))
1147
n = 1;
1148
else
1149
n = -1;
1150
}
1151
if (n < 0)
1152
error(ERROR_SYSTEM|2, "%s: read error", fp->name);
1153
if (sfclose(op))
1154
error(ERROR_SYSTEM|2, "%s: write error", p);
1155
sfclose(ip);
1156
if (n > 0)
1157
{
1158
if ((st.st_mode &= S_IPERM) != (ts.st_mode &= S_IPERM) && chmod(p, st.st_mode))
1159
error(ERROR_SYSTEM|2, "%s: cannot set mode", p);
1160
if (touch(p, st.st_mtime, st.st_mtime, 0))
1161
error(ERROR_SYSTEM|2, "%s: cannot set times", p);
1162
}
1163
else if (!n)
1164
{
1165
if (state->verbose)
1166
sfprintf(sfstdout, " %s %s\n", (fp->flags & DIRECTORY) ? "rmdir" : " rm", p);
1167
if (((fp->flags & DIRECTORY) ? rmdir : remove)(p))
1168
error(ERROR_SYSTEM|2, "%s: cannot remove", p);
1169
}
1170
}
1171
}
1172
}
1173
if (state->unreferenced)
1174
{
1175
if (!(fts = fts_open((char**)state->copy.data, FTS_ONEPATH|FTS_META|FTS_PHYSICAL|FTS_NOPREORDER, order)))
1176
error(ERROR_SYSTEM|3, "%s: cannot search directory", state->copy.data);
1177
while (ent = scan(state, fts))
1178
if ((!(fp = dtmatch(state->files, ent->fts_path)) || !(fp->flags & COPIED)) && (!state->ignore.size || !strmatch(ent->fts_path, state->ignore.data)) && (!state->limit.size || strmatch(ent->fts_path, state->limit.data)))
1179
{
1180
if (state->verbose || !state->exec)
1181
sfprintf(sfstdout, " %s %s\n", (ent->fts_info & FTS_D) ? "rmdir" : " rm", ent->fts_path);
1182
if (state->exec && ((ent->fts_info & FTS_D) ? rmdir : remove)(ent->fts_path))
1183
error(ERROR_SYSTEM|2, "%s: cannot remove", ent->fts_path);
1184
}
1185
if (fts_close(fts))
1186
error(ERROR_SYSTEM|3, "%s: directory read error", state->copy.data);
1187
}
1188
}
1189
else if (state->unreferenced)
1190
{
1191
i = 0;
1192
if (state->documentroot.data)
1193
dirs[i++] = state->documentroot.data;
1194
else
1195
{
1196
if (!state->root.data)
1197
state->root.size = strlen(state->root.data = ".");
1198
dirs[i++] = state->root.data;
1199
}
1200
if (state->dataroot.data)
1201
dirs[i++] = state->dataroot.data;
1202
if (state->programroot.data)
1203
dirs[i++] = state->programroot.data;
1204
dirs[i] = 0;
1205
if (!(fts = fts_open(dirs, FTS_META|FTS_PHYSICAL|FTS_NOPREORDER, order)))
1206
error(ERROR_SYSTEM|3, "%s: cannot search directory", state->root.data);
1207
while (ent = scan(state, fts))
1208
if (!dtmatch(state->files, ent->fts_path) && (!strmatch(ent->fts_name, state->keep.data) || state->skip.size && strmatch(ent->fts_name, state->skip.data) || state->ignore.size && strmatch(ent->fts_path, state->ignore.data)))
1209
{
1210
if (state->strict || !streq(ent->fts_name, state->index.data))
1211
{
1212
if (!state->remove)
1213
sfprintf(sfstdout, "%s\n", fmtquote(ent->fts_path, "\"", "\"", ent->fts_pathlen, 0));
1214
else if (!state->limit.size || strmatch(ent->fts_path, state->limit.data))
1215
{
1216
if (state->verbose || !state->exec)
1217
sfprintf(sfstdout, " %s %s\n", (ent->fts_info & FTS_D) ? "rmdir" : " rm", ent->fts_path);
1218
if (state->exec && ((ent->fts_info & FTS_D) ? rmdir : remove)(ent->fts_path))
1219
error(ERROR_SYSTEM|2, "%s: cannot remove", ent->fts_path);
1220
}
1221
}
1222
else if (s = strrchr(ent->fts_path, '/'))
1223
{
1224
*s = 0;
1225
add(state, ent->fts_path, COPIED, NiL, 0, NiL);
1226
*s = '/';
1227
}
1228
}
1229
if (fts_close(fts))
1230
error(ERROR_SYSTEM|3, "%s: directory read error", state->root.data);
1231
}
1232
else
1233
{
1234
for (fp = (File_t*)dtfirst(state->files); fp; fp = (File_t*)dtnext(state->files, fp))
1235
if (state->all || (fp->flags & MISSING) == state->missing)
1236
{
1237
sfprintf(sfstdout, "%s", fmtquote(fp->name, "\"", "\"", strlen(fp->name), 0));
1238
if (state->dependents && fp->refs)
1239
{
1240
sfputc(sfstdout, ' ');
1241
sfputc(sfstdout, ':');
1242
for (lp = fp->refs; lp; lp = lp->next)
1243
sfprintf(sfstdout, " %s", fmtquote(lp->file->name, "\"", "\"", strlen(lp->file->name), 0));
1244
}
1245
sfputc(sfstdout, '\n');
1246
}
1247
}
1248
return error_info.errors != 0;
1249
}
1250
1251