Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libast/misc/fastfind.c
1810 views
1
#pragma prototyped
2
/*
3
* original code
4
*
5
* James A. Woods, Informatics General Corporation,
6
* NASA Ames Research Center, 6/81.
7
* Usenix ;login:, February/March, 1983, p. 8.
8
*
9
* discipline/method interface
10
*
11
* Glenn Fowler
12
* AT&T Research
13
* modified from the original BSD source
14
*
15
* 'fastfind' scans a file list for the full pathname of a file
16
* given only a piece of the name. The list is processed with
17
* with "front-compression" and bigram coding. Front compression reduces
18
* space by a factor of 4-5, bigram coding by a further 20-25%.
19
*
20
* there are 4 methods:
21
*
22
* FF_old original with 7 bit bigram encoding (no magic)
23
* FF_gnu 8 bit clean front compression (FF_gnu_magic)
24
* FF_dir FF_gnu with sfgetl/sfputl and trailing / on dirs (FF_dir_magic)
25
* FF_typ FF_dir with (mime) types (FF_typ_magic)
26
*
27
* the bigram encoding steals the eighth bit (that's why its FF_old)
28
* maybe one day we'll limit it to readonly:
29
*
30
* 0-2*FF_OFF likeliest differential counts + offset to make nonnegative
31
* FF_ESC 4 byte big-endian out-of-range count+FF_OFF follows
32
* FF_MIN-FF_MAX ascii residue
33
* >=FF_MAX bigram codes
34
*
35
* a two-tiered string search technique is employed
36
*
37
* a metacharacter-free subpattern and partial pathname is matched
38
* backwards to avoid full expansion of the pathname list
39
*
40
* then the actual shell glob-style regular expression (if in this form)
41
* is matched against the candidate pathnames using the slower regexec()
42
*
43
* The original BSD code is covered by the BSD license:
44
*
45
* Copyright (c) 1985, 1993, 1999
46
* The Regents of the University of California. All rights reserved.
47
*
48
* Redistribution and use in source and binary forms, with or without
49
* modification, are permitted provided that the following conditions
50
* are met:
51
* 1. Redistributions of source code must retain the above copyright
52
* notice, this list of conditions and the following disclaimer.
53
* 2. Redistributions in binary form must reproduce the above copyright
54
* notice, this list of conditions and the following disclaimer in the
55
* documentation and/or other materials provided with the distribution.
56
* 3. Neither the name of the University nor the names of its contributors
57
* may be used to endorse or promote products derived from this software
58
* without specific prior written permission.
59
*
60
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
61
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
62
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
63
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
64
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
65
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
66
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
68
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
69
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
70
* SUCH DAMAGE.
71
*/
72
73
static const char id[] = "\n@(#)$Id: fastfind (AT&T Research) 2002-10-02 $\0\n";
74
75
static const char lib[] = "libast:fastfind";
76
77
#include "findlib.h"
78
79
#define FIND_MATCH "*/(find|locate)/*"
80
81
/*
82
* this db could be anywhere
83
* findcodes[] directories are checked for findnames[i]
84
*/
85
86
static char* findcodes[] =
87
{
88
0,
89
0,
90
FIND_CODES,
91
"/usr/local/share/lib",
92
"/usr/local/lib",
93
"/usr/share/lib",
94
"/usr/lib",
95
"/var/spool",
96
"/usr/local/var",
97
"/var/lib",
98
"/var/lib/slocate",
99
"/var/db",
100
};
101
102
static char* findnames[] =
103
{
104
"find/codes",
105
"find/find.codes",
106
"locate/locatedb",
107
"locatedb",
108
"locate.database",
109
"slocate.db",
110
};
111
112
/*
113
* convert t to lower case and drop leading x- and x- after /
114
* converted value copied to b of size n
115
*/
116
117
char*
118
typefix(char* buf, size_t n, register const char* t)
119
{
120
register int c;
121
register char* b = buf;
122
123
if ((*t == 'x' || *t == 'X') && *(t + 1) == '-')
124
t += 2;
125
while (c = *t++)
126
{
127
if (isupper(c))
128
c = tolower(c);
129
if ((*b++ = c) == '/' && (*t == 'x' || *t == 'X') && *(t + 1) == '-')
130
t += 2;
131
}
132
*b = 0;
133
return buf;
134
}
135
136
/*
137
* return a fastfind stream handle for pattern
138
*/
139
140
Find_t*
141
findopen(const char* file, const char* pattern, const char* type, Finddisc_t* disc)
142
{
143
register Find_t* fp;
144
register char* p;
145
register char* s;
146
register char* b;
147
register int i;
148
register int j;
149
char* path;
150
int brace = 0;
151
int paren = 0;
152
int k;
153
int q;
154
int fd;
155
int uid;
156
Vmalloc_t* vm;
157
Type_t* tp;
158
struct stat st;
159
160
161
if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
162
goto nospace;
163
164
/*
165
* NOTE: searching for FIND_CODES would be much simpler if we
166
* just stuck with our own, but we also support GNU
167
* locate codes and have to search for the one of a
168
* bazillion possible names for that file
169
*/
170
171
if (!findcodes[1])
172
findcodes[1] = getenv(FIND_CODES_ENV);
173
if (disc->flags & FIND_GENERATE)
174
{
175
if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, sizeof(Encode_t) - sizeof(Code_t))))
176
goto nospace;
177
fp->vm = vm;
178
fp->id = lib;
179
fp->disc = disc;
180
fp->generate = 1;
181
if (file && (!*file || streq(file, "-")))
182
file = 0;
183
uid = geteuid();
184
j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes);
185
186
/*
187
* look for the codes file, but since it may not exist yet,
188
* also look for the containing directory if i<2 or if
189
* it is sufficiently qualified (FIND_MATCH)
190
*/
191
192
for (i = 0; i < j; i++)
193
if (path = findcodes[i])
194
{
195
if (*path == '/')
196
{
197
if (!stat(path, &st))
198
{
199
if (S_ISDIR(st.st_mode))
200
{
201
for (k = 0; k < elementsof(findnames); k++)
202
{
203
sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s/%s", path, findnames[k]);
204
if (!eaccess(fp->encode.file, R_OK|W_OK))
205
{
206
path = fp->encode.file;
207
break;
208
}
209
if (strchr(findnames[k], '/') && (b = strrchr(fp->encode.file, '/')))
210
{
211
*b = 0;
212
if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
213
{
214
*b = '/';
215
path = fp->encode.file;
216
break;
217
}
218
}
219
}
220
if (k < elementsof(findnames))
221
break;
222
}
223
else if (st.st_uid == uid && (st.st_mode & S_IWUSR))
224
{
225
sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path);
226
path = fp->encode.file;
227
break;
228
}
229
}
230
else if (i < 2 || strmatch(path, FIND_MATCH))
231
{
232
sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path);
233
if (b = strrchr(fp->encode.file, '/'))
234
{
235
*b = 0;
236
if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
237
{
238
*b = '/';
239
path = fp->encode.file;
240
break;
241
}
242
}
243
}
244
}
245
else if (pathpath(path, "", PATH_REGULAR|PATH_READ|PATH_WRITE, fp->encode.file, sizeof(fp->encode.file)))
246
{
247
path = fp->encode.file;
248
break;
249
}
250
else if (b = strrchr(path, '/'))
251
{
252
sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%-.*s", b - path, path);
253
if (pathpath(fp->encode.file, "", PATH_EXECUTE|PATH_READ|PATH_WRITE, fp->encode.temp, sizeof(fp->encode.temp)) &&
254
!stat(fp->encode.temp, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
255
{
256
sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s%s", fp->encode.temp, b);
257
path = fp->encode.file;
258
break;
259
}
260
}
261
}
262
if (i >= j)
263
{
264
if (fp->disc->errorf)
265
(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]);
266
goto drop;
267
}
268
if (fp->disc->flags & FIND_OLD)
269
{
270
/*
271
* FF_old generates temp data that is read
272
* in a second pass to generate the real codes
273
*/
274
275
fp->method = FF_old;
276
if (!(fp->fp = sftmp(32 * PATH_MAX)))
277
{
278
if (fp->disc->errorf)
279
(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot create tmp file");
280
goto drop;
281
}
282
}
283
else
284
{
285
/*
286
* the rest generate into a temp file that
287
* is simply renamed on completion
288
*/
289
290
if (s = strrchr(path, '/'))
291
{
292
*s = 0;
293
p = path;
294
}
295
else
296
p = ".";
297
if (!pathtemp(fp->encode.temp, sizeof(fp->encode.temp), p, "ff", &fd))
298
{
299
if (fp->disc->errorf)
300
(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot create tmp file in this directory", p ? p : ".");
301
goto drop;
302
}
303
if (s)
304
*s = '/';
305
if (!(fp->fp = sfnew(NiL, NiL, (size_t)SF_UNBOUND, fd, SF_WRITE)))
306
{
307
if (fp->disc->errorf)
308
(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot open tmp file", fp->encode.temp);
309
close(fd);
310
goto drop;
311
}
312
if (fp->disc->flags & FIND_TYPE)
313
{
314
fp->method = FF_typ;
315
fp->encode.namedisc.key = offsetof(Type_t, name);
316
fp->encode.namedisc.link = offsetof(Type_t, byname);
317
fp->encode.indexdisc.key = offsetof(Type_t, index);
318
fp->encode.indexdisc.size = sizeof(unsigned long);
319
fp->encode.indexdisc.link = offsetof(Type_t, byindex);
320
s = "system/dir";
321
if (!(fp->encode.namedict = dtopen(&fp->encode.namedisc, Dtoset)) || !(fp->encode.indexdict = dtopen(&fp->encode.indexdisc, Dtoset)) || !(tp = newof(0, Type_t, 1, strlen(s) + 1)))
322
{
323
if (fp->encode.namedict)
324
dtclose(fp->encode.namedict);
325
if (fp->disc->errorf)
326
(*fp->disc->errorf)(fp, fp->disc, 2, "cannot allocate type table");
327
goto drop;
328
}
329
330
/*
331
* type index 1 is always system/dir
332
*/
333
334
tp->index = ++fp->types;
335
strcpy(tp->name, s);
336
dtinsert(fp->encode.namedict, tp);
337
dtinsert(fp->encode.indexdict, tp);
338
}
339
else if (fp->disc->flags & FIND_GNU)
340
{
341
fp->method = FF_gnu;
342
sfputc(fp->fp, 0);
343
sfputr(fp->fp, FF_gnu_magic, 0);
344
}
345
else
346
{
347
fp->method = FF_dir;
348
sfputc(fp->fp, 0);
349
sfputr(fp->fp, FF_dir_magic, 0);
350
}
351
}
352
}
353
else
354
{
355
i = sizeof(Decode_t) + sizeof(Code_t);
356
if (!pattern || !*pattern)
357
pattern = "*";
358
i += (j = 2 * (strlen(pattern) + 1));
359
if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, i)))
360
{
361
vmclose(vm);
362
return 0;
363
}
364
fp->vm = vm;
365
fp->id = lib;
366
fp->disc = disc;
367
if (disc->flags & FIND_ICASE)
368
fp->decode.ignorecase = 1;
369
j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes);
370
for (i = 0; i < j; i++)
371
if (path = findcodes[i])
372
{
373
if (*path == '/')
374
{
375
if (!stat(path, &st))
376
{
377
if (S_ISDIR(st.st_mode))
378
{
379
for (k = 0; k < elementsof(findnames); k++)
380
{
381
sfsprintf(fp->decode.path, sizeof(fp->decode.path), "%s/%s", path, findnames[k]);
382
if (fp->fp = sfopen(NiL, fp->decode.path, "r"))
383
{
384
path = fp->decode.path;
385
break;
386
}
387
}
388
if (fp->fp)
389
break;
390
}
391
else if (fp->fp = sfopen(NiL, path, "r"))
392
break;
393
}
394
}
395
else if ((path = pathpath(path, "", PATH_REGULAR|PATH_READ, fp->decode.path, sizeof(fp->decode.path))) && (fp->fp = sfopen(NiL, path, "r")))
396
break;
397
}
398
if (!fp->fp)
399
{
400
if (fp->disc->errorf)
401
(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]);
402
goto drop;
403
}
404
if (fstat(sffileno(fp->fp), &st))
405
{
406
if (fp->disc->errorf)
407
(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot stat codes", path);
408
goto drop;
409
}
410
if (fp->secure = ((st.st_mode & (S_IRGRP|S_IROTH)) == S_IRGRP) && st.st_gid == getegid() && getegid() != getgid())
411
setgid(getgid());
412
fp->stamp = st.st_mtime;
413
b = (s = fp->decode.temp) + 1;
414
for (i = 0; i < elementsof(fp->decode.bigram1); i++)
415
{
416
if ((j = sfgetc(fp->fp)) == EOF)
417
goto invalid;
418
if (!(*s++ = fp->decode.bigram1[i] = j) && i)
419
{
420
i = -i;
421
break;
422
}
423
if ((j = sfgetc(fp->fp)) == EOF)
424
goto invalid;
425
if (!(*s++ = fp->decode.bigram2[i] = j) && (i || fp->decode.bigram1[0] >= '0' && fp->decode.bigram1[0] <= '1'))
426
break;
427
}
428
if (streq(b, FF_typ_magic))
429
{
430
if (type)
431
{
432
type = (const char*)typefix(fp->decode.bigram2, sizeof(fp->decode.bigram2), type);
433
memset(fp->decode.bigram1, 0, sizeof(fp->decode.bigram1));
434
}
435
fp->method = FF_typ;
436
for (j = 0, i = 1;; i++)
437
{
438
if (!(s = sfgetr(fp->fp, 0, 0)))
439
goto invalid;
440
if (!*s)
441
break;
442
if (type && strmatch(s, type))
443
{
444
FF_SET_TYPE(fp, i);
445
j++;
446
}
447
}
448
if (type && !j)
449
goto drop;
450
fp->types = j;
451
}
452
else if (streq(b, FF_dir_magic))
453
fp->method = FF_dir;
454
else if (streq(b, FF_gnu_magic))
455
fp->method = FF_gnu;
456
else if (!*b && *--b >= '0' && *b <= '1')
457
{
458
fp->method = FF_gnu;
459
while (j = sfgetc(fp->fp))
460
{
461
if (j == EOF || fp->decode.count >= sizeof(fp->decode.path))
462
goto invalid;
463
fp->decode.path[fp->decode.count++] = j;
464
}
465
}
466
else
467
{
468
fp->method = FF_old;
469
if (i < 0)
470
{
471
if ((j = sfgetc(fp->fp)) == EOF)
472
goto invalid;
473
fp->decode.bigram2[i = -i] = j;
474
}
475
while (++i < elementsof(fp->decode.bigram1))
476
{
477
if ((j = sfgetc(fp->fp)) == EOF)
478
goto invalid;
479
fp->decode.bigram1[i] = j;
480
if ((j = sfgetc(fp->fp)) == EOF)
481
goto invalid;
482
fp->decode.bigram2[i] = j;
483
}
484
if ((fp->decode.peek = sfgetc(fp->fp)) != FF_OFF)
485
goto invalid;
486
}
487
488
/*
489
* set up the physical dir table
490
*/
491
492
if (disc->version >= 19980301L)
493
{
494
fp->verifyf = disc->verifyf;
495
if (disc->dirs && *disc->dirs)
496
{
497
for (k = 0; disc->dirs[k]; k++);
498
if (k == 1 && streq(disc->dirs[0], "/"))
499
k = 0;
500
if (k)
501
{
502
if (!(fp->dirs = vmnewof(fp->vm, 0, char*, 2 * k + 1, 0)))
503
goto drop;
504
if (!(fp->lens = vmnewof(fp->vm, 0, int, 2 * k, 0)))
505
goto drop;
506
p = 0;
507
b = fp->decode.temp;
508
j = fp->method == FF_old || fp->method == FF_gnu;
509
510
/*
511
* fill the dir list with logical and
512
* physical names since we don't know
513
* which way the db was encoded (it
514
* could be *both* ways)
515
*/
516
517
for (i = q = 0; i < k; i++)
518
{
519
if (*(s = disc->dirs[i]) == '/')
520
sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s", s);
521
else if (!p && !(p = getcwd(fp->decode.path, sizeof(fp->decode.path))))
522
goto nospace;
523
else
524
sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s/%s", p, s);
525
s = pathcanon(b, sizeof(fp->decode.temp), 0);
526
*s = '/';
527
*(s + 1) = 0;
528
if (!(fp->dirs[q] = vmstrdup(fp->vm, b)))
529
goto nospace;
530
if (j)
531
(fp->dirs[q])[s - b] = 0;
532
q++;
533
*s = 0;
534
s = pathcanon(b, sizeof(fp->decode.temp), PATH_PHYSICAL);
535
*s = '/';
536
*(s + 1) = 0;
537
if (!strneq(b, fp->dirs[q - 1], s - b))
538
{
539
if (!(fp->dirs[q] = vmstrdup(fp->vm, b)))
540
goto nospace;
541
if (j)
542
(fp->dirs[q])[s - b] = 0;
543
q++;
544
}
545
}
546
strsort(fp->dirs, q, strcasecmp);
547
for (i = 0; i < q; i++)
548
fp->lens[i] = strlen(fp->dirs[i]);
549
}
550
}
551
}
552
if (fp->verifyf || (disc->flags & FIND_VERIFY))
553
{
554
if (fp->method != FF_dir && fp->method != FF_typ)
555
{
556
if (fp->disc->errorf)
557
(*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s code format does not support directory verification", path, fp->method == FF_gnu ? FF_gnu_magic : "OLD-BIGRAM");
558
goto drop;
559
}
560
fp->verify = 1;
561
}
562
563
/*
564
* extract last glob-free subpattern in name for fast pre-match
565
* prepend 0 for backwards match
566
*/
567
568
if (p = s = (char*)pattern)
569
{
570
b = fp->decode.pattern;
571
for (;;)
572
{
573
switch (*b++ = *p++)
574
{
575
case 0:
576
break;
577
case '\\':
578
s = p;
579
if (!*p++)
580
break;
581
continue;
582
case '[':
583
if (!brace)
584
{
585
brace++;
586
if (*p == ']')
587
p++;
588
}
589
continue;
590
case ']':
591
if (brace)
592
{
593
brace--;
594
s = p;
595
}
596
continue;
597
case '(':
598
if (!brace)
599
paren++;
600
continue;
601
case ')':
602
if (!brace && paren > 0 && !--paren)
603
s = p;
604
continue;
605
case '|':
606
case '&':
607
if (!brace && !paren)
608
{
609
s = "";
610
break;
611
}
612
continue;
613
case '*':
614
case '?':
615
s = p;
616
continue;
617
default:
618
continue;
619
}
620
break;
621
}
622
if (s != pattern && !streq(pattern, "*"))
623
{
624
fp->decode.match = 1;
625
if (i = regcomp(&fp->decode.re, pattern, REG_SHELL|REG_AUGMENTED|(fp->decode.ignorecase?REG_ICASE:0)))
626
{
627
if (disc->errorf)
628
{
629
regerror(i, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp));
630
(*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", pattern, fp->decode.temp);
631
}
632
goto drop;
633
}
634
}
635
if (*s)
636
{
637
*b++ = 0;
638
while (i = *s++)
639
*b++ = i;
640
*b-- = 0;
641
fp->decode.end = b;
642
if (fp->decode.ignorecase)
643
for (s = fp->decode.pattern; s <= b; s++)
644
if (isupper(*s))
645
*s = tolower(*s);
646
}
647
}
648
}
649
return fp;
650
nospace:
651
if (disc->errorf)
652
(*fp->disc->errorf)(fp, fp->disc, 2, "out of space");
653
if (!vm)
654
return 0;
655
if (!fp)
656
{
657
vmclose(vm);
658
return 0;
659
}
660
goto drop;
661
invalid:
662
if (fp->disc->errorf)
663
(*fp->disc->errorf)(fp, fp->disc, 2, "%s: invalid codes", path);
664
drop:
665
if (!fp->generate && fp->decode.match)
666
regfree(&fp->decode.re);
667
if (fp->fp)
668
sfclose(fp->fp);
669
vmclose(fp->vm);
670
return 0;
671
}
672
673
/*
674
* return the next fastfind path
675
* 0 returned when list exhausted
676
*/
677
678
char*
679
findread(register Find_t* fp)
680
{
681
register char* p;
682
register char* q;
683
register char* s;
684
register char* b;
685
register char* e;
686
register int c;
687
register int n;
688
register int m;
689
int ignorecase;
690
int t;
691
unsigned char w[4];
692
struct stat st;
693
694
if (fp->generate)
695
return 0;
696
if (fp->decode.restore)
697
{
698
*fp->decode.restore = '/';
699
fp->decode.restore = 0;
700
}
701
ignorecase = fp->decode.ignorecase ? STR_ICASE : 0;
702
c = fp->decode.peek;
703
next:
704
for (;;)
705
{
706
switch (fp->method)
707
{
708
case FF_dir:
709
t = 0;
710
n = sfgetl(fp->fp);
711
goto grab;
712
case FF_gnu:
713
if ((c = sfgetc(fp->fp)) == EOF)
714
return 0;
715
if (c == 0x80)
716
{
717
if ((c = sfgetc(fp->fp)) == EOF)
718
return 0;
719
n = c << 8;
720
if ((c = sfgetc(fp->fp)) == EOF)
721
return 0;
722
n |= c;
723
if (n & 0x8000)
724
n = (n - 0xffff) - 1;
725
}
726
else if ((n = c) & 0x80)
727
n = (n - 0xff) - 1;
728
t = 0;
729
goto grab;
730
case FF_typ:
731
t = sfgetu(fp->fp);
732
n = sfgetl(fp->fp);
733
grab:
734
p = fp->decode.path + (fp->decode.count += n);
735
do
736
{
737
if ((c = sfgetc(fp->fp)) == EOF)
738
return 0;
739
} while (*p++ = c);
740
p -= 2;
741
break;
742
case FF_old:
743
if (c == EOF)
744
{
745
fp->decode.peek = c;
746
return 0;
747
}
748
if (c == FF_ESC)
749
{
750
if (sfread(fp->fp, w, sizeof(w)) != sizeof(w))
751
return 0;
752
if (fp->decode.swap >= 0)
753
{
754
c = (int32_t)((w[0] << 24) | (w[1] << 16) | (w[2] << 8) | w[3]);
755
if (!fp->decode.swap)
756
{
757
/*
758
* the old format uses machine
759
* byte order; this test uses
760
* the smallest magnitude of
761
* both byte orders on the
762
* first encoded path motion
763
* to determine the original
764
* byte order
765
*/
766
767
m = c;
768
if (m < 0)
769
m = -m;
770
n = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
771
if (n < 0)
772
n = -n;
773
if (m < n)
774
fp->decode.swap = 1;
775
else
776
{
777
fp->decode.swap = -1;
778
c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
779
}
780
}
781
}
782
else
783
c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
784
}
785
fp->decode.count += c - FF_OFF;
786
for (p = fp->decode.path + fp->decode.count; (c = sfgetc(fp->fp)) > FF_ESC;)
787
if (c & (1<<(CHAR_BIT-1)))
788
{
789
*p++ = fp->decode.bigram1[c & ((1<<(CHAR_BIT-1))-1)];
790
*p++ = fp->decode.bigram2[c & ((1<<(CHAR_BIT-1))-1)];
791
}
792
else
793
*p++ = c;
794
*p-- = 0;
795
t = 0;
796
break;
797
}
798
b = fp->decode.path;
799
if (fp->decode.found)
800
fp->decode.found = 0;
801
else
802
b += fp->decode.count;
803
if (fp->dirs)
804
for (;;)
805
{
806
if (!*fp->dirs)
807
return 0;
808
809
/*
810
* use the ordering and lengths to prune
811
* comparison function calls
812
* (*fp->dirs)[*fp->lens]=='/' if its
813
* already been matched
814
*/
815
816
if ((n = p - fp->decode.path + 1) > (m = *fp->lens))
817
{
818
if (!(*fp->dirs)[m])
819
goto next;
820
if (!strncasecmp(*fp->dirs, fp->decode.path, m))
821
break;
822
}
823
else if (n == m)
824
{
825
if (!(*fp->dirs)[m])
826
{
827
if (!(n = strcasecmp(*fp->dirs, fp->decode.path)) && (ignorecase || !strcmp(*fp->dirs, fp->decode.path)))
828
{
829
if (m > 0)
830
{
831
(*fp->dirs)[m] = '/';
832
if ((*fp->dirs)[m - 1] != '/')
833
(*fp->dirs)[++(*fp->lens)] = '/';
834
}
835
break;
836
}
837
if (n >= 0)
838
goto next;
839
}
840
}
841
else if (!(*fp->dirs)[m])
842
goto next;
843
fp->dirs++;
844
fp->lens++;
845
}
846
if (fp->verify && (*p == '/' || t == 1))
847
{
848
if ((n = p - fp->decode.path))
849
*p = 0;
850
else
851
n = 1;
852
if (fp->verifyf)
853
n = (*fp->verifyf)(fp, fp->decode.path, n, fp->disc);
854
else if (stat(fp->decode.path, &st))
855
n = -1;
856
else if ((unsigned long)st.st_mtime > fp->stamp)
857
n = 1;
858
else
859
n = 0;
860
*p = '/';
861
862
/*
863
* n<0 skip this subtree
864
* n==0 keep as is
865
* n>0 read this dir now
866
*/
867
868
/* NOT IMPLEMENTED YET */
869
}
870
if (FF_OK_TYPE(fp, t))
871
{
872
if (fp->decode.end)
873
{
874
if (*(s = p) == '/')
875
s--;
876
if (*fp->decode.pattern == '/' && b > fp->decode.path)
877
b--;
878
for (; s >= b; s--)
879
if (*s == *fp->decode.end || ignorecase && tolower(*s) == *fp->decode.end)
880
{
881
if (ignorecase)
882
for (e = fp->decode.end - 1, q = s - 1; *e && (*q == *e || tolower(*q) == *e); e--, q--);
883
else
884
for (e = fp->decode.end - 1, q = s - 1; *e && *q == *e; e--, q--);
885
if (!*e)
886
{
887
fp->decode.found = 1;
888
if (!fp->decode.match || strgrpmatch(fp->decode.path, fp->decode.pattern, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT|ignorecase))
889
{
890
fp->decode.peek = c;
891
if (*p == '/')
892
*(fp->decode.restore = p) = 0;
893
if (!fp->secure || !access(fp->decode.path, F_OK))
894
return fp->decode.path;
895
}
896
break;
897
}
898
}
899
}
900
else if (!fp->decode.match || !(n = regexec(&fp->decode.re, fp->decode.path, 0, NiL, 0)))
901
{
902
fp->decode.peek = c;
903
if (*p == '/' && p > fp->decode.path)
904
*(fp->decode.restore = p) = 0;
905
if (!fp->secure || !access(fp->decode.path, F_OK))
906
return fp->decode.path;
907
}
908
else if (n != REG_NOMATCH)
909
{
910
if (fp->disc->errorf)
911
{
912
regerror(n, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp));
913
(*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", fp->decode.pattern, fp->decode.temp);
914
}
915
return 0;
916
}
917
}
918
}
919
}
920
921
/*
922
* add path to the code table
923
* paths are assumed to be in sort order
924
*/
925
926
int
927
findwrite(register Find_t* fp, const char* path, size_t len, const char* type)
928
{
929
register unsigned char* s;
930
register unsigned char* e;
931
register unsigned char* p;
932
register int n;
933
register int d;
934
register Type_t* x;
935
register unsigned long u;
936
937
if (!fp->generate)
938
return -1;
939
if (type && fp->method == FF_dir)
940
{
941
len = sfsprintf(fp->encode.mark, sizeof(fp->encode.mark), "%-.*s/", len, path);
942
path = fp->encode.mark;
943
}
944
s = (unsigned char*)path;
945
if (len <= 0)
946
len = strlen(path);
947
if (len < sizeof(fp->encode.path))
948
e = s + len++;
949
else
950
{
951
len = sizeof(fp->encode.path) - 1;
952
e = s + len;
953
}
954
p = (unsigned char*)fp->encode.path;
955
while (s < e)
956
{
957
if (*s != *p++)
958
break;
959
s++;
960
}
961
n = s - (unsigned char*)path;
962
switch (fp->method)
963
{
964
case FF_gnu:
965
d = n - fp->encode.prefix;
966
if (d >= -127 && d <= 127)
967
sfputc(fp->fp, d & 0xff);
968
else
969
{
970
sfputc(fp->fp, 0x80);
971
sfputc(fp->fp, (d >> 8) & 0xff);
972
sfputc(fp->fp, d & 0xff);
973
}
974
fp->encode.prefix = n;
975
sfputr(fp->fp, (char*)s, 0);
976
break;
977
case FF_old:
978
sfprintf(fp->fp, "%ld", n - fp->encode.prefix + FF_OFF);
979
fp->encode.prefix = n;
980
sfputc(fp->fp, ' ');
981
p = s;
982
while (s < e)
983
{
984
n = *s++;
985
if (s >= e)
986
break;
987
fp->encode.code[n][*s++]++;
988
}
989
while (p < e)
990
{
991
if ((n = *p++) < FF_MIN || n >= FF_MAX)
992
n = '?';
993
sfputc(fp->fp, n);
994
}
995
sfputc(fp->fp, 0);
996
break;
997
case FF_typ:
998
if (type)
999
{
1000
type = (const char*)typefix((char*)fp->encode.bigram, sizeof(fp->encode.bigram), type);
1001
if (x = (Type_t*)dtmatch(fp->encode.namedict, type))
1002
u = x->index;
1003
else if (!(x = newof(0, Type_t, 1, strlen(type) + 1)))
1004
u = 0;
1005
else
1006
{
1007
u = x->index = ++fp->types;
1008
strcpy(x->name, type);
1009
dtinsert(fp->encode.namedict, x);
1010
dtinsert(fp->encode.indexdict, x);
1011
}
1012
}
1013
else
1014
u = 0;
1015
sfputu(fp->fp, u);
1016
/*FALLTHROUGH...*/
1017
case FF_dir:
1018
d = n - fp->encode.prefix;
1019
sfputl(fp->fp, d);
1020
fp->encode.prefix = n;
1021
sfputr(fp->fp, (char*)s, 0);
1022
break;
1023
}
1024
memcpy(fp->encode.path, path, len);
1025
return 0;
1026
}
1027
1028
/*
1029
* findsync() helper
1030
*/
1031
1032
static int
1033
finddone(register Find_t* fp)
1034
{
1035
int r;
1036
1037
if (sfsync(fp->fp))
1038
{
1039
if (fp->disc->errorf)
1040
(*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfsync]", fp->encode.file);
1041
return -1;
1042
}
1043
if (sferror(fp->fp))
1044
{
1045
if (fp->disc->errorf)
1046
(*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sferror]", fp->encode.file);
1047
return -1;
1048
}
1049
r = sfclose(fp->fp);
1050
fp->fp = 0;
1051
if (r)
1052
{
1053
if (fp->disc->errorf)
1054
(*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfclose]", fp->encode.file);
1055
return -1;
1056
}
1057
return 0;
1058
}
1059
1060
/*
1061
* finish the code table
1062
*/
1063
1064
static int
1065
findsync(register Find_t* fp)
1066
{
1067
register char* s;
1068
register int n;
1069
register int m;
1070
register int d;
1071
register Type_t* x;
1072
char* t;
1073
int b;
1074
long z;
1075
Sfio_t* sp;
1076
1077
switch (fp->method)
1078
{
1079
case FF_dir:
1080
case FF_gnu:
1081
/*
1082
* replace the real file with the temp file
1083
*/
1084
1085
if (finddone(fp))
1086
goto bad;
1087
remove(fp->encode.file);
1088
if (rename(fp->encode.temp, fp->encode.file))
1089
{
1090
if (fp->disc->errorf)
1091
(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot rename from tmp file %s", fp->encode.file, fp->encode.temp);
1092
remove(fp->encode.temp);
1093
return -1;
1094
}
1095
break;
1096
case FF_old:
1097
/*
1098
* determine the top FF_MAX bigrams
1099
*/
1100
1101
for (n = 0; n < FF_MAX; n++)
1102
for (m = 0; m < FF_MAX; m++)
1103
fp->encode.hits[fp->encode.code[n][m]]++;
1104
fp->encode.hits[0] = 0;
1105
m = 1;
1106
for (n = USHRT_MAX; n >= 0; n--)
1107
if (d = fp->encode.hits[n])
1108
{
1109
fp->encode.hits[n] = m;
1110
if ((m += d) > FF_MAX)
1111
break;
1112
}
1113
while (--n >= 0)
1114
fp->encode.hits[n] = 0;
1115
for (n = FF_MAX - 1; n >= 0; n--)
1116
for (m = FF_MAX - 1; m >= 0; m--)
1117
if (fp->encode.hits[fp->encode.code[n][m]])
1118
{
1119
d = fp->encode.code[n][m];
1120
b = fp->encode.hits[d] - 1;
1121
fp->encode.code[n][m] = b + FF_MAX;
1122
if (fp->encode.hits[d]++ >= FF_MAX)
1123
fp->encode.hits[d] = 0;
1124
fp->encode.bigram[b *= 2] = n;
1125
fp->encode.bigram[b + 1] = m;
1126
}
1127
else
1128
fp->encode.code[n][m] = 0;
1129
1130
/*
1131
* commit the real file
1132
*/
1133
1134
if (sfseek(fp->fp, (Sfoff_t)0, SEEK_SET))
1135
{
1136
if (fp->disc->errorf)
1137
(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot rewind tmp file");
1138
return -1;
1139
}
1140
if (!(sp = sfopen(NiL, fp->encode.file, "w")))
1141
goto badcreate;
1142
1143
/*
1144
* dump the bigrams
1145
*/
1146
1147
sfwrite(sp, fp->encode.bigram, sizeof(fp->encode.bigram));
1148
1149
/*
1150
* encode the massaged paths
1151
*/
1152
1153
while (s = sfgetr(fp->fp, 0, 0))
1154
{
1155
z = strtol(s, &t, 0);
1156
s = t;
1157
if (z < 0 || z > 2 * FF_OFF)
1158
{
1159
sfputc(sp, FF_ESC);
1160
sfputc(sp, (z >> 24));
1161
sfputc(sp, (z >> 16));
1162
sfputc(sp, (z >> 8));
1163
sfputc(sp, z);
1164
}
1165
else
1166
sfputc(sp, z);
1167
while (n = *s++)
1168
{
1169
if (!(m = *s++))
1170
{
1171
sfputc(sp, n);
1172
break;
1173
}
1174
if (d = fp->encode.code[n][m])
1175
sfputc(sp, d);
1176
else
1177
{
1178
sfputc(sp, n);
1179
sfputc(sp, m);
1180
}
1181
}
1182
}
1183
sfclose(fp->fp);
1184
fp->fp = sp;
1185
if (finddone(fp))
1186
goto bad;
1187
break;
1188
case FF_typ:
1189
if (finddone(fp))
1190
goto bad;
1191
if (!(fp->fp = sfopen(NiL, fp->encode.temp, "r")))
1192
{
1193
if (fp->disc->errorf)
1194
(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot read tmp file", fp->encode.temp);
1195
remove(fp->encode.temp);
1196
return -1;
1197
}
1198
1199
/*
1200
* commit the output file
1201
*/
1202
1203
if (!(sp = sfopen(NiL, fp->encode.file, "w")))
1204
goto badcreate;
1205
1206
/*
1207
* write the header magic
1208
*/
1209
1210
sfputc(sp, 0);
1211
sfputr(sp, FF_typ_magic, 0);
1212
1213
/*
1214
* write the type table in index order starting with 1
1215
*/
1216
1217
for (x = (Type_t*)dtfirst(fp->encode.indexdict); x; x = (Type_t*)dtnext(fp->encode.indexdict, x))
1218
sfputr(sp, x->name, 0);
1219
sfputc(sp, 0);
1220
1221
/*
1222
* append the front compressed strings
1223
*/
1224
1225
if (sfmove(fp->fp, sp, SF_UNBOUND, -1) < 0 || !sfeof(fp->fp))
1226
{
1227
sfclose(sp);
1228
if (fp->disc->errorf)
1229
(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot append codes", fp->encode.file);
1230
goto bad;
1231
}
1232
sfclose(fp->fp);
1233
fp->fp = sp;
1234
if (finddone(fp))
1235
goto bad;
1236
remove(fp->encode.temp);
1237
break;
1238
}
1239
return 0;
1240
badcreate:
1241
if (fp->disc->errorf)
1242
(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot write codes", fp->encode.file);
1243
bad:
1244
if (fp->fp)
1245
{
1246
sfclose(fp->fp);
1247
fp->fp = 0;
1248
}
1249
remove(fp->encode.temp);
1250
return -1;
1251
}
1252
1253
/*
1254
* close an open fastfind stream
1255
*/
1256
1257
int
1258
findclose(register Find_t* fp)
1259
{
1260
int n = 0;
1261
1262
if (!fp)
1263
return -1;
1264
if (fp->generate)
1265
{
1266
n = findsync(fp);
1267
if (fp->encode.indexdict)
1268
dtclose(fp->encode.indexdict);
1269
if (fp->encode.namedict)
1270
dtclose(fp->encode.namedict);
1271
}
1272
else
1273
{
1274
if (fp->decode.match)
1275
regfree(&fp->decode.re);
1276
n = 0;
1277
}
1278
if (fp->fp)
1279
sfclose(fp->fp);
1280
vmclose(fp->vm);
1281
return n;
1282
}
1283
1284