Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libast/misc/magic.c
1810 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1985-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Glenn Fowler <[email protected]> *
18
* David Korn <[email protected]> *
19
* Phong Vo <[email protected]> *
20
* *
21
***********************************************************************/
22
#pragma prototyped
23
/*
24
* Glenn Fowler
25
* AT&T Research
26
*
27
* library interface to file
28
*
29
* the sum of the hacks {s5,v10,planix} is _____ than the parts
30
*/
31
32
static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2011-03-09 $\0\n";
33
34
static const char lib[] = "libast:magic";
35
36
#include <ast.h>
37
#include <ctype.h>
38
#include <ccode.h>
39
#include <dt.h>
40
#include <modex.h>
41
#include <error.h>
42
#include <regex.h>
43
#include <swap.h>
44
45
#define T(m) (*m?ERROR_translate(NiL,NiL,lib,m):m)
46
47
#define match(s,p) strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE)
48
49
#define MAXNEST 10 /* { ... } nesting limit */
50
#define MINITEM 4 /* magic buffer rounding */
51
52
typedef struct /* identifier dictionary entry */
53
{
54
const char name[16]; /* identifier name */
55
int value; /* identifier value */
56
Dtlink_t link; /* dictionary link */
57
} Info_t;
58
59
typedef struct Edit /* edit substitution */
60
{
61
struct Edit* next; /* next in list */
62
regex_t* from; /* from pattern */
63
} Edit_t;
64
65
struct Entry;
66
67
typedef struct /* loop info */
68
{
69
struct Entry* lab; /* call this function */
70
int start; /* start here */
71
int size; /* increment by this amount */
72
int count; /* dynamic loop count */
73
int offset; /* dynamic offset */
74
} Loop_t;
75
76
typedef struct Entry /* magic file entry */
77
{
78
struct Entry* next; /* next in list */
79
char* expr; /* offset expression */
80
union
81
{
82
unsigned long num;
83
char* str;
84
struct Entry* lab;
85
regex_t* sub;
86
Loop_t* loop;
87
} value; /* comparison value */
88
char* desc; /* file description */
89
char* mime; /* file mime type */
90
unsigned long offset; /* offset in bytes */
91
unsigned long mask; /* mask before compare */
92
char cont; /* continuation operation */
93
char type; /* datum type */
94
char op; /* comparison operation */
95
char nest; /* { or } nesting operation */
96
char swap; /* forced swap order */
97
} Entry_t;
98
99
#define CC_BIT 5
100
101
#if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2)
102
typedef unsigned short Cctype_t;
103
#else
104
typedef unsigned long Cctype_t;
105
#endif
106
107
#define CC_text 0x01
108
#define CC_control 0x02
109
#define CC_latin 0x04
110
#define CC_binary 0x08
111
#define CC_utf_8 0x10
112
113
#define CC_notext CC_text /* CC_text is flipped before checking */
114
115
#define CC_MASK (CC_binary|CC_latin|CC_control|CC_text)
116
117
#define CCTYPE(c) (((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text)
118
119
#define ID_NONE 0
120
#define ID_ASM 1
121
#define ID_C 2
122
#define ID_COBOL 3
123
#define ID_COPYBOOK 4
124
#define ID_CPLUSPLUS 5
125
#define ID_FORTRAN 6
126
#define ID_HTML 7
127
#define ID_INCL1 8
128
#define ID_INCL2 9
129
#define ID_INCL3 10
130
#define ID_MAM1 11
131
#define ID_MAM2 12
132
#define ID_MAM3 13
133
#define ID_NOTEXT 14
134
#define ID_PL1 15
135
#define ID_YACC 16
136
137
#define ID_MAX ID_YACC
138
139
#define INFO_atime 1
140
#define INFO_blocks 2
141
#define INFO_ctime 3
142
#define INFO_fstype 4
143
#define INFO_gid 5
144
#define INFO_mode 6
145
#define INFO_mtime 7
146
#define INFO_name 8
147
#define INFO_nlink 9
148
#define INFO_size 10
149
#define INFO_uid 11
150
151
#define _MAGIC_PRIVATE_ \
152
Magicdisc_t* disc; /* discipline */ \
153
Vmalloc_t* vm; /* vmalloc region */ \
154
Entry_t* magic; /* parsed magic table */ \
155
Entry_t* magiclast; /* last entry in magic */ \
156
char* mime; /* MIME type */ \
157
unsigned char* x2n; /* CC_ALIEN=>CC_NATIVE */ \
158
char fbuf[SF_BUFSIZE + 1]; /* file data */ \
159
char xbuf[SF_BUFSIZE + 1]; /* indirect file data */ \
160
char nbuf[256]; /* !CC_NATIVE data */ \
161
char mbuf[64]; /* mime string */ \
162
char sbuf[64]; /* type suffix string */ \
163
char tbuf[2 * PATH_MAX]; /* type string */ \
164
Cctype_t cctype[UCHAR_MAX + 1]; /* char code types */ \
165
unsigned int count[UCHAR_MAX + 1]; /* char frequency count */ \
166
unsigned int multi[UCHAR_MAX + 1]; /* muti char count */ \
167
int keep[MAXNEST]; /* ckmagic nest stack */ \
168
char* cap[MAXNEST]; /* ckmagic mime stack */ \
169
char* msg[MAXNEST]; /* ckmagic text stack */ \
170
Entry_t* ret[MAXNEST]; /* ckmagic return stack */ \
171
int fbsz; /* fbuf size */ \
172
int fbmx; /* fbuf max size */ \
173
int xbsz; /* xbuf size */ \
174
int swap; /* swap() operation */ \
175
unsigned long flags; /* disc+open flags */ \
176
long xoff; /* xbuf offset */ \
177
int identifier[ID_MAX + 1]; /* Info_t identifier */ \
178
Sfio_t* fp; /* fbuf fp */ \
179
Sfio_t* tmp; /* tmp string */ \
180
regdisc_t redisc; /* regex discipline */ \
181
Dtdisc_t dtdisc; /* dict discipline */ \
182
Dt_t* idtab; /* identifier dict */ \
183
Dt_t* infotab; /* info keyword dict */
184
185
#include <magic.h>
186
187
static Info_t dict[] = /* keyword dictionary */
188
{
189
{ "COMMON", ID_FORTRAN },
190
{ "COMPUTE", ID_COBOL },
191
{ "COMP", ID_COPYBOOK },
192
{ "COMPUTATIONAL",ID_COPYBOOK },
193
{ "DCL", ID_PL1 },
194
{ "DEFINED", ID_PL1 },
195
{ "DIMENSION", ID_FORTRAN },
196
{ "DIVISION", ID_COBOL },
197
{ "FILLER", ID_COPYBOOK },
198
{ "FIXED", ID_PL1 },
199
{ "FUNCTION", ID_FORTRAN },
200
{ "HTML", ID_HTML },
201
{ "INTEGER", ID_FORTRAN },
202
{ "MAIN", ID_PL1 },
203
{ "OPTIONS", ID_PL1 },
204
{ "PERFORM", ID_COBOL },
205
{ "PIC", ID_COPYBOOK },
206
{ "REAL", ID_FORTRAN },
207
{ "REDEFINES", ID_COPYBOOK },
208
{ "S9", ID_COPYBOOK },
209
{ "SECTION", ID_COBOL },
210
{ "SELECT", ID_COBOL },
211
{ "SUBROUTINE", ID_FORTRAN },
212
{ "TEXT", ID_ASM },
213
{ "VALUE", ID_COPYBOOK },
214
{ "attr", ID_MAM3 },
215
{ "binary", ID_YACC },
216
{ "block", ID_FORTRAN },
217
{ "bss", ID_ASM },
218
{ "byte", ID_ASM },
219
{ "char", ID_C },
220
{ "class", ID_CPLUSPLUS },
221
{ "clr", ID_NOTEXT },
222
{ "comm", ID_ASM },
223
{ "common", ID_FORTRAN },
224
{ "data", ID_ASM },
225
{ "dimension", ID_FORTRAN },
226
{ "done", ID_MAM2 },
227
{ "double", ID_C },
228
{ "even", ID_ASM },
229
{ "exec", ID_MAM3 },
230
{ "extern", ID_C },
231
{ "float", ID_C },
232
{ "function", ID_FORTRAN },
233
{ "globl", ID_ASM },
234
{ "h", ID_INCL3 },
235
{ "html", ID_HTML },
236
{ "include", ID_INCL1 },
237
{ "int", ID_C },
238
{ "integer", ID_FORTRAN },
239
{ "jmp", ID_NOTEXT },
240
{ "left", ID_YACC },
241
{ "libc", ID_INCL2 },
242
{ "long", ID_C },
243
{ "make", ID_MAM1 },
244
{ "mov", ID_NOTEXT },
245
{ "private", ID_CPLUSPLUS },
246
{ "public", ID_CPLUSPLUS },
247
{ "real", ID_FORTRAN },
248
{ "register", ID_C },
249
{ "right", ID_YACC },
250
{ "sfio", ID_INCL2 },
251
{ "static", ID_C },
252
{ "stdio", ID_INCL2 },
253
{ "struct", ID_C },
254
{ "subroutine", ID_FORTRAN },
255
{ "sys", ID_NOTEXT },
256
{ "term", ID_YACC },
257
{ "text", ID_ASM },
258
{ "tst", ID_NOTEXT },
259
{ "type", ID_YACC },
260
{ "typedef", ID_C },
261
{ "u", ID_INCL2 },
262
{ "union", ID_YACC },
263
{ "void", ID_C },
264
};
265
266
static Info_t info[] =
267
{
268
{ "atime", INFO_atime },
269
{ "blocks", INFO_blocks },
270
{ "ctime", INFO_ctime },
271
{ "fstype", INFO_fstype },
272
{ "gid", INFO_gid },
273
{ "mode", INFO_mode },
274
{ "mtime", INFO_mtime },
275
{ "name", INFO_name },
276
{ "nlink", INFO_nlink },
277
{ "size", INFO_size },
278
{ "uid", INFO_uid },
279
};
280
281
/*
282
* return pointer to data at offset off and size siz
283
*/
284
285
static char*
286
getdata(register Magic_t* mp, register long off, register int siz)
287
{
288
register long n;
289
290
if (off < 0)
291
return 0;
292
if (off + siz <= mp->fbsz)
293
return mp->fbuf + off;
294
if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz)
295
{
296
if (off + siz > mp->fbmx)
297
return 0;
298
n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2);
299
if (sfseek(mp->fp, n, SEEK_SET) != n)
300
return 0;
301
if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0)
302
{
303
mp->xoff = 0;
304
mp->xbsz = 0;
305
return 0;
306
}
307
mp->xbuf[mp->xbsz] = 0;
308
mp->xoff = n;
309
if (off + siz > mp->xoff + mp->xbsz)
310
return 0;
311
}
312
return mp->xbuf + off - mp->xoff;
313
}
314
315
/*
316
* @... evaluator for strexpr()
317
*/
318
319
static long
320
indirect(const char* cs, char** e, void* handle)
321
{
322
register char* s = (char*)cs;
323
register Magic_t* mp = (Magic_t*)handle;
324
register long n = 0;
325
register char* p;
326
327
if (s)
328
{
329
if (*s == '@')
330
{
331
n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0);
332
switch (*(s = *e))
333
{
334
case 'b':
335
case 'B':
336
s++;
337
if (p = getdata(mp, n, 1))
338
n = *(unsigned char*)p;
339
else
340
s = (char*)cs;
341
break;
342
case 'h':
343
case 'H':
344
s++;
345
if (p = getdata(mp, n, 2))
346
n = swapget(mp->swap, p, 2);
347
else
348
s = (char*)cs;
349
break;
350
case 'q':
351
case 'Q':
352
s++;
353
if (p = getdata(mp, n, 8))
354
n = swapget(mp->swap, p, 8);
355
else
356
s = (char*)cs;
357
break;
358
default:
359
if (isalnum(*s))
360
s++;
361
if (p = getdata(mp, n, 4))
362
n = swapget(mp->swap, p, 4);
363
else
364
s = (char*)cs;
365
break;
366
}
367
}
368
*e = s;
369
}
370
else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
371
(*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e);
372
return n;
373
}
374
375
/*
376
* emit regex error message
377
*/
378
379
static void
380
regmessage(Magic_t* mp, regex_t* re, int code)
381
{
382
char buf[128];
383
384
if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
385
{
386
regerror(code, re, buf, sizeof(buf));
387
(*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf);
388
}
389
}
390
391
/*
392
* decompose vcodex(3) method composition
393
*/
394
395
static char*
396
vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x)
397
{
398
unsigned char* map;
399
const char* o;
400
int c;
401
int n;
402
int i;
403
int a;
404
405
map = CCMAP(CC_ASCII, CC_NATIVE);
406
a = 0;
407
i = 1;
408
for (;;)
409
{
410
if (i)
411
i = 0;
412
else
413
*b++ = '^';
414
if (m < (x - 1) && !*(m + 1))
415
{
416
/*
417
* obsolete indices
418
*/
419
420
if (!a)
421
{
422
a = 1;
423
o = "old, ";
424
while (b < e && (c = *o++))
425
*b++ = c;
426
}
427
switch (*m)
428
{
429
case 0: o = "delta"; break;
430
case 1: o = "huffman"; break;
431
case 2: o = "huffgroup"; break;
432
case 3: o = "arith"; break;
433
case 4: o = "bwt"; break;
434
case 5: o = "rle"; break;
435
case 6: o = "mtf"; break;
436
case 7: o = "transpose"; break;
437
case 8: o = "table"; break;
438
case 9: o = "huffpart"; break;
439
case 50: o = "map"; break;
440
case 100: o = "recfm"; break;
441
case 101: o = "ss7"; break;
442
default: o = "UNKNOWN"; break;
443
}
444
m += 2;
445
while (b < e && (c = *o++))
446
*b++ = c;
447
}
448
else
449
while (b < e && m < x && (c = *m++))
450
{
451
if (map)
452
c = map[c];
453
*b++ = c;
454
}
455
if (b >= e)
456
break;
457
n = 0;
458
while (m < x)
459
{
460
n = (n<<7) | (*m & 0x7f);
461
if (!(*m++ & 0x80))
462
break;
463
}
464
if (n >= (x - m))
465
break;
466
m += n;
467
}
468
return b;
469
}
470
471
/*
472
* check for magic table match in buf
473
*/
474
475
static char*
476
ckmagic(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st, unsigned long off)
477
{
478
register Entry_t* ep;
479
register char* p;
480
register char* b;
481
register int level = 0;
482
int call = -1;
483
int all = 0;
484
int c;
485
int str;
486
char* q;
487
char* t;
488
char* cur;
489
char* base = 0;
490
unsigned long num;
491
unsigned long mask;
492
regmatch_t matches[10];
493
494
mp->swap = 0;
495
b = mp->msg[0] = cur = buf;
496
mp->mime = mp->cap[0] = 0;
497
mp->keep[0] = 0;
498
for (ep = mp->magic; ep; ep = ep->next)
499
{
500
fun:
501
if (ep->nest == '{')
502
{
503
if (++level >= MAXNEST)
504
{
505
call = -1;
506
level = 0;
507
mp->keep[0] = 0;
508
b = mp->msg[0];
509
mp->mime = mp->cap[0];
510
continue;
511
}
512
mp->keep[level] = mp->keep[level - 1] != 0;
513
mp->msg[level] = b;
514
mp->cap[level] = mp->mime;
515
}
516
switch (ep->cont)
517
{
518
case '#':
519
if (mp->keep[level] && b > cur)
520
{
521
if ((mp->flags & MAGIC_ALL) && b < (end - 3))
522
{
523
all = 1;
524
*b++ = '\n';
525
cur = b;
526
continue;
527
}
528
*b = 0;
529
return buf;
530
}
531
mp->swap = 0;
532
b = mp->msg[0] = cur;
533
mp->mime = mp->cap[0] = 0;
534
if (ep->type == ' ')
535
continue;
536
break;
537
case '$':
538
if (mp->keep[level] && call < (MAXNEST - 1))
539
{
540
mp->ret[++call] = ep;
541
ep = ep->value.lab;
542
goto fun;
543
}
544
continue;
545
case ':':
546
ep = mp->ret[call--];
547
if (ep->op == 'l')
548
goto fun;
549
continue;
550
case '|':
551
if (mp->keep[level] > 1)
552
goto checknest;
553
/*FALLTHROUGH*/
554
default:
555
if (!mp->keep[level])
556
{
557
b = mp->msg[level];
558
mp->mime = mp->cap[level];
559
goto checknest;
560
}
561
break;
562
}
563
p = "";
564
num = 0;
565
if (!ep->expr)
566
num = ep->offset + off;
567
else
568
switch (ep->offset)
569
{
570
case 0:
571
num = strexpr(ep->expr, NiL, indirect, mp) + off;
572
break;
573
case INFO_atime:
574
num = st->st_atime;
575
ep->type = 'D';
576
break;
577
case INFO_blocks:
578
num = iblocks(st);
579
ep->type = 'N';
580
break;
581
case INFO_ctime:
582
num = st->st_ctime;
583
ep->type = 'D';
584
break;
585
case INFO_fstype:
586
p = fmtfs(st);
587
ep->type = toupper(ep->type);
588
break;
589
case INFO_gid:
590
if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
591
{
592
p = fmtgid(st->st_gid);
593
ep->type = toupper(ep->type);
594
}
595
else
596
{
597
num = st->st_gid;
598
ep->type = 'N';
599
}
600
break;
601
case INFO_mode:
602
if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
603
{
604
p = fmtmode(st->st_mode, 0);
605
ep->type = toupper(ep->type);
606
}
607
else
608
{
609
num = modex(st->st_mode);
610
ep->type = 'N';
611
}
612
break;
613
case INFO_mtime:
614
num = st->st_ctime;
615
ep->type = 'D';
616
break;
617
case INFO_name:
618
if (!base)
619
{
620
if (base = strrchr(file, '/'))
621
base++;
622
else
623
base = (char*)file;
624
}
625
p = base;
626
ep->type = toupper(ep->type);
627
break;
628
case INFO_nlink:
629
num = st->st_nlink;
630
ep->type = 'N';
631
break;
632
case INFO_size:
633
num = st->st_size;
634
ep->type = 'N';
635
break;
636
case INFO_uid:
637
if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
638
{
639
p = fmtuid(st->st_uid);
640
ep->type = toupper(ep->type);
641
}
642
else
643
{
644
num = st->st_uid;
645
ep->type = 'N';
646
}
647
break;
648
}
649
switch (ep->type)
650
{
651
652
case 'b':
653
if (!(p = getdata(mp, num, 1)))
654
goto next;
655
num = *(unsigned char*)p;
656
break;
657
658
case 'h':
659
if (!(p = getdata(mp, num, 2)))
660
goto next;
661
num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2);
662
break;
663
664
case 'd':
665
case 'l':
666
case 'v':
667
if (!(p = getdata(mp, num, 4)))
668
goto next;
669
num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4);
670
break;
671
672
case 'q':
673
if (!(p = getdata(mp, num, 8)))
674
goto next;
675
num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8);
676
break;
677
678
case 'e':
679
if (!(p = getdata(mp, num, 0)))
680
goto next;
681
/*FALLTHROUGH*/
682
case 'E':
683
if (!ep->value.sub)
684
goto next;
685
if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
686
{
687
c = mp->fbsz;
688
if (c >= sizeof(mp->nbuf))
689
c = sizeof(mp->nbuf) - 1;
690
p = (char*)memcpy(mp->nbuf, p, c);
691
p[c] = 0;
692
ccmapstr(mp->x2n, p, c);
693
if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
694
{
695
if (c != REG_NOMATCH)
696
regmessage(mp, ep->value.sub, c);
697
goto next;
698
}
699
}
700
p = ep->value.sub->re_sub->re_buf;
701
q = T(ep->desc);
702
t = *q ? q : p;
703
if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b')
704
*b++ = ' ';
705
b += sfsprintf(b, end - b, *q ? q : "%s", p + (*p == '\b'));
706
if (ep->mime)
707
mp->mime = ep->mime;
708
goto checknest;
709
710
case 's':
711
if (!(p = getdata(mp, num, ep->mask)))
712
goto next;
713
goto checkstr;
714
case 'm':
715
if (!(p = getdata(mp, num, 0)))
716
goto next;
717
/*FALLTHROUGH*/
718
case 'M':
719
case 'S':
720
checkstr:
721
for (;;)
722
{
723
if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p))
724
break;
725
if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask))
726
break;
727
if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf))
728
goto next;
729
p = (char*)memcpy(mp->nbuf, p, ep->mask);
730
p[ep->mask] = 0;
731
ccmapstr(mp->x2n, p, ep->mask);
732
}
733
q = T(ep->desc);
734
if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
735
*b++ = ' ';
736
for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++);
737
*t = 0;
738
b += sfsprintf(b, end - b, q + (*q == '\b'), p);
739
*t = c;
740
if (ep->mime)
741
mp->mime = ep->mime;
742
goto checknest;
743
744
}
745
if (mask = ep->mask)
746
num &= mask;
747
switch (ep->op)
748
{
749
750
case '=':
751
case '@':
752
if (num == ep->value.num)
753
break;
754
if (ep->cont != '#')
755
goto next;
756
if (!mask)
757
mask = ~mask;
758
if (ep->type == 'h')
759
{
760
if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num)
761
{
762
if (!(mp->swap & (mp->swap + 1)))
763
mp->swap = 7;
764
goto swapped;
765
}
766
}
767
else if (ep->type == 'l')
768
{
769
for (c = 1; c < 4; c++)
770
if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num)
771
{
772
if (!(mp->swap & (mp->swap + 1)))
773
mp->swap = 7;
774
goto swapped;
775
}
776
}
777
else if (ep->type == 'q')
778
{
779
for (c = 1; c < 8; c++)
780
if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num)
781
goto swapped;
782
}
783
goto next;
784
785
case '!':
786
if (num != ep->value.num)
787
break;
788
goto next;
789
790
case '^':
791
if (num ^ ep->value.num)
792
break;
793
goto next;
794
795
case '>':
796
if (num > ep->value.num)
797
break;
798
goto next;
799
800
case '<':
801
if (num < ep->value.num)
802
break;
803
goto next;
804
805
case 'l':
806
if (num > 0 && mp->keep[level] && call < (MAXNEST - 1))
807
{
808
if (!ep->value.loop->count)
809
{
810
ep->value.loop->count = num;
811
ep->value.loop->offset = off;
812
off = ep->value.loop->start;
813
}
814
else if (!--ep->value.loop->count)
815
{
816
off = ep->value.loop->offset;
817
goto next;
818
}
819
else
820
off += ep->value.loop->size;
821
mp->ret[++call] = ep;
822
ep = ep->value.loop->lab;
823
goto fun;
824
}
825
goto next;
826
827
case 'm':
828
c = mp->swap;
829
t = ckmagic(mp, file, b + (b > cur), end, st, num);
830
mp->swap = c;
831
if (t)
832
{
833
if (b > cur && b < end)
834
*b = ' ';
835
b += strlen(b);
836
}
837
else if (ep->cont == '&')
838
goto next;
839
break;
840
841
case 'r':
842
#if _UWIN
843
{
844
char* e;
845
Sfio_t* rp;
846
Sfio_t* gp;
847
848
if (!(t = strrchr(file, '.')))
849
goto next;
850
sfprintf(mp->tmp, "/reg/classes_root/%s", t);
851
if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r")))
852
goto next;
853
*ep->desc = 0;
854
*ep->mime = 0;
855
gp = 0;
856
while (t = sfgetr(rp, '\n', 1))
857
{
858
if (strneq(t, "Content Type=", 13))
859
{
860
ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0);
861
strcpy(ep->mime, t + 13);
862
if (gp)
863
break;
864
}
865
else
866
{
867
sfprintf(mp->tmp, "/reg/classes_root/%s", t);
868
if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r")))
869
{
870
ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1);
871
strcpy(ep->desc, t);
872
if (*ep->mime)
873
break;
874
}
875
}
876
}
877
sfclose(rp);
878
if (!gp)
879
goto next;
880
if (!*ep->mime)
881
{
882
t = T(ep->desc);
883
if (!strncasecmp(t, "microsoft", 9))
884
t += 9;
885
while (isspace(*t))
886
t++;
887
e = "application/x-ms-";
888
ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e));
889
e = strcopy(ep->mime, e);
890
while ((c = *t++) && c != '.' && c != ' ')
891
*e++ = isupper(c) ? tolower(c) : c;
892
*e = 0;
893
}
894
while (t = sfgetr(gp, '\n', 1))
895
if (*t && !streq(t, "\"\""))
896
{
897
ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0);
898
strcpy(ep->desc, t);
899
break;
900
}
901
sfclose(gp);
902
if (!*ep->desc)
903
goto next;
904
if (!t)
905
for (t = T(ep->desc); *t; t++)
906
if (*t == '.')
907
*t = ' ';
908
if (!mp->keep[level])
909
mp->keep[level] = 2;
910
mp->mime = ep->mime;
911
break;
912
}
913
#else
914
if (ep->cont == '#' && !mp->keep[level])
915
mp->keep[level] = 1;
916
goto next;
917
#endif
918
919
case 'v':
920
if (!(p = getdata(mp, num, 4)))
921
goto next;
922
c = 0;
923
do
924
{
925
num++;
926
c = (c<<7) | (*p & 0x7f);
927
} while (*p++ & 0x80);
928
if (!(p = getdata(mp, num, c)))
929
goto next;
930
if (mp->keep[level]++ && b > cur && b < (end - 1) && *(b - 1) != ' ')
931
{
932
*b++ = ',';
933
*b++ = ' ';
934
}
935
b = vcdecomp(b, cur + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c);
936
goto checknest;
937
938
}
939
swapped:
940
q = T(ep->desc);
941
if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
942
*b++ = ' ';
943
if (*q == '\b')
944
q++;
945
str = 0;
946
for (t = q; *t; t++)
947
if (*t == '%' && (c = *(t + 1)))
948
{
949
if (c == '%')
950
t++;
951
else
952
while (c && c != '%')
953
{
954
if (c == 's')
955
{
956
str = 1;
957
break;
958
}
959
else if (c == 'c' || c == 'd' || c == 'i' || c == 'u' || c == 'x' || c == 'X')
960
goto format;
961
t++;
962
c = *(t + 1);
963
}
964
}
965
format:
966
if (!str)
967
b += sfsprintf(b, end - b, q, num, num == 1 ? "" : "s", 0, 0, 0, 0, 0, 0);
968
else if (ep->type == 'd' || ep->type == 'D')
969
b += sfsprintf(b, end - b, q, fmttime("%?%QL", (time_t)num), 0, 0, 0, 0, 0, 0, 0);
970
else if (ep->type == 'v')
971
b += sfsprintf(b, end - b, q, fmtversion(num), 0, 0, 0, 0, 0, 0, 0);
972
else
973
b += sfsprintf(b, end - b, q, fmtnum(num, 0), num == 1 ? "" : "s", 0, 0, 0, 0, 0, 0);
974
if (ep->mime && *ep->mime)
975
mp->mime = ep->mime;
976
checknest:
977
if (ep->nest == '}')
978
{
979
if (!mp->keep[level])
980
{
981
b = mp->msg[level];
982
mp->mime = mp->cap[level];
983
}
984
else if (level > 0)
985
mp->keep[level - 1] = mp->keep[level];
986
if (--level < 0)
987
{
988
level = 0;
989
mp->keep[0] = 0;
990
}
991
}
992
continue;
993
next:
994
if (ep->cont == '&')
995
mp->keep[level] = 0;
996
goto checknest;
997
}
998
if (all && b-- || mp->keep[level] && b > cur)
999
{
1000
*b = 0;
1001
return buf;
1002
}
1003
return 0;
1004
}
1005
1006
/*
1007
* check english language stats
1008
*/
1009
1010
static int
1011
ckenglish(register Magic_t* mp, int pun, int badpun)
1012
{
1013
register char* s;
1014
register int vowl = 0;
1015
register int freq = 0;
1016
register int rare = 0;
1017
1018
if (5 * badpun > pun)
1019
return 0;
1020
if (2 * mp->count[';'] > mp->count['E'] + mp->count['e'])
1021
return 0;
1022
if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e'])
1023
return 0;
1024
for (s = "aeiou"; *s; s++)
1025
vowl += mp->count[toupper(*s)] + mp->count[*s];
1026
for (s = "etaion"; *s; s++)
1027
freq += mp->count[toupper(*s)] + mp->count[*s];
1028
for (s = "vjkqxz"; *s; s++)
1029
rare += mp->count[toupper(*s)] + mp->count[*s];
1030
return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare;
1031
}
1032
1033
/*
1034
* check programming language stats
1035
*/
1036
1037
static char*
1038
cklang(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st)
1039
{
1040
register int c;
1041
register unsigned char* b;
1042
register unsigned char* e;
1043
register int q;
1044
register char* s;
1045
char* t;
1046
char* base;
1047
char* suff;
1048
char* t1;
1049
char* t2;
1050
char* t3;
1051
int n;
1052
int badpun;
1053
int code;
1054
int pun;
1055
Cctype_t flags;
1056
Info_t* ip;
1057
1058
b = (unsigned char*)mp->fbuf;
1059
e = b + mp->fbsz;
1060
memzero(mp->count, sizeof(mp->count));
1061
memzero(mp->multi, sizeof(mp->multi));
1062
memzero(mp->identifier, sizeof(mp->identifier));
1063
1064
/*
1065
* check character coding
1066
*/
1067
1068
flags = 0;
1069
while (b < e)
1070
flags |= mp->cctype[*b++];
1071
b = (unsigned char*)mp->fbuf;
1072
code = 0;
1073
q = CC_ASCII;
1074
n = CC_MASK;
1075
for (c = 0; c < CC_MAPS; c++)
1076
{
1077
flags ^= CC_text;
1078
if ((flags & CC_MASK) < n)
1079
{
1080
n = flags & CC_MASK;
1081
q = c;
1082
}
1083
flags >>= CC_BIT;
1084
}
1085
flags = n;
1086
if (!(flags & (CC_binary|CC_notext)))
1087
{
1088
if (q != CC_NATIVE)
1089
{
1090
code = q;
1091
ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE);
1092
}
1093
if (b[0] == '#' && b[1] == '!')
1094
{
1095
for (b += 2; b < e && isspace(*b); b++);
1096
for (s = (char*)b; b < e && isprint(*b); b++);
1097
c = *b;
1098
*b = 0;
1099
if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK))
1100
{
1101
if (t = strrchr(s, '/'))
1102
s = t + 1;
1103
for (t = s; *t; t++)
1104
if (isspace(*t))
1105
{
1106
*t = 0;
1107
break;
1108
}
1109
sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh");
1110
mp->mime = mp->mbuf;
1111
if (match(s, "*sh"))
1112
{
1113
t1 = T("command");
1114
if (streq(s, "sh"))
1115
*s = 0;
1116
else
1117
{
1118
*b++ = ' ';
1119
*b = 0;
1120
}
1121
}
1122
else
1123
{
1124
t1 = T("interpreter");
1125
*b++ = ' ';
1126
*b = 0;
1127
}
1128
sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1);
1129
s = mp->sbuf;
1130
goto qualify;
1131
}
1132
*b = c;
1133
b = (unsigned char*)mp->fbuf;
1134
}
1135
badpun = 0;
1136
pun = 0;
1137
q = 0;
1138
s = 0;
1139
t = 0;
1140
while (b < e)
1141
{
1142
c = *b++;
1143
mp->count[c]++;
1144
if (c == q && (q != '*' || *b == '/' && b++))
1145
{
1146
mp->multi[q]++;
1147
q = 0;
1148
}
1149
else if (c == '\\')
1150
{
1151
s = 0;
1152
b++;
1153
}
1154
else if (!q)
1155
{
1156
if (isalpha(c) || c == '_')
1157
{
1158
if (!s)
1159
s = (char*)b - 1;
1160
}
1161
else if (!isdigit(c))
1162
{
1163
if (s)
1164
{
1165
if (s > mp->fbuf)
1166
switch (*(s - 1))
1167
{
1168
case ':':
1169
if (*b == ':')
1170
mp->multi[':']++;
1171
break;
1172
case '.':
1173
if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n'))
1174
mp->multi['.']++;
1175
break;
1176
case '\n':
1177
case '\\':
1178
if (*b == '{')
1179
t = (char*)b + 1;
1180
break;
1181
case '{':
1182
if (s == t && *b == '}')
1183
mp->multi['X']++;
1184
break;
1185
}
1186
if (!mp->idtab)
1187
{
1188
if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dtset))
1189
for (q = 0; q < elementsof(dict); q++)
1190
dtinsert(mp->idtab, &dict[q]);
1191
else if (mp->disc->errorf)
1192
(*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
1193
q = 0;
1194
}
1195
if (mp->idtab)
1196
{
1197
*(b - 1) = 0;
1198
if (ip = (Info_t*)dtmatch(mp->idtab, s))
1199
mp->identifier[ip->value]++;
1200
*(b - 1) = c;
1201
}
1202
s = 0;
1203
}
1204
switch (c)
1205
{
1206
case '\t':
1207
if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n')
1208
mp->multi['\t']++;
1209
break;
1210
case '"':
1211
case '\'':
1212
q = c;
1213
break;
1214
case '/':
1215
if (*b == '*')
1216
q = *b++;
1217
else if (*b == '/')
1218
q = '\n';
1219
break;
1220
case '$':
1221
if (*b == '(' && *(b + 1) != ' ')
1222
mp->multi['$']++;
1223
break;
1224
case '{':
1225
case '}':
1226
case '[':
1227
case ']':
1228
case '(':
1229
mp->multi[c]++;
1230
break;
1231
case ')':
1232
mp->multi[c]++;
1233
goto punctuation;
1234
case ':':
1235
if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2)))
1236
mp->multi[':']++;
1237
goto punctuation;
1238
case '.':
1239
case ',':
1240
case '%':
1241
case ';':
1242
case '?':
1243
punctuation:
1244
pun++;
1245
if (*b != ' ' && *b != '\n')
1246
badpun++;
1247
break;
1248
}
1249
}
1250
}
1251
}
1252
}
1253
else
1254
while (b < e)
1255
mp->count[*b++]++;
1256
base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file;
1257
suff = (t1 = strrchr(base, '.')) ? t1 + 1 : "";
1258
if (!flags)
1259
{
1260
if (match(suff, "*sh|bat|cmd"))
1261
goto id_sh;
1262
if (match(base, "*@(mkfile)"))
1263
goto id_mk;
1264
if (match(base, "*@(makefile|.mk)"))
1265
goto id_make;
1266
if (match(base, "*@(mamfile|.mam)"))
1267
goto id_mam;
1268
if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy"))
1269
goto id_c;
1270
if (match(suff, "f"))
1271
goto id_fortran;
1272
if (match(suff, "htm+(l)"))
1273
goto id_html;
1274
if (match(suff, "cpy"))
1275
goto id_copybook;
1276
if (match(suff, "cob|cbl|cb2"))
1277
goto id_cobol;
1278
if (match(suff, "pl[1i]"))
1279
goto id_pl1;
1280
if (match(suff, "tex"))
1281
goto id_tex;
1282
if (match(suff, "asm|s"))
1283
goto id_asm;
1284
if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.')))
1285
{
1286
id_sh:
1287
s = T("command script");
1288
mp->mime = "application/sh";
1289
goto qualify;
1290
}
1291
if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *"))
1292
{
1293
s = T("mail message");
1294
mp->mime = "message/rfc822";
1295
goto qualify;
1296
}
1297
if (match(base, "*@(mkfile)"))
1298
{
1299
id_mk:
1300
s = "mkfile";
1301
mp->mime = "application/mk";
1302
goto qualify;
1303
}
1304
if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0))
1305
{
1306
id_make:
1307
s = "makefile";
1308
mp->mime = "application/make";
1309
goto qualify;
1310
}
1311
if (mp->multi['.'] >= 3)
1312
{
1313
s = T("nroff input");
1314
mp->mime = "application/x-troff";
1315
goto qualify;
1316
}
1317
if (mp->multi['X'] >= 3)
1318
{
1319
s = T("TeX input");
1320
mp->mime = "application/x-tex";
1321
goto qualify;
1322
}
1323
if (mp->fbsz < SF_BUFSIZE &&
1324
(mp->multi['('] == mp->multi[')'] &&
1325
mp->multi['{'] == mp->multi['}'] &&
1326
mp->multi['['] == mp->multi[']']) ||
1327
mp->fbsz >= SF_BUFSIZE &&
1328
(mp->multi['('] >= mp->multi[')'] &&
1329
mp->multi['{'] >= mp->multi['}'] &&
1330
mp->multi['['] >= mp->multi[']']))
1331
{
1332
c = mp->identifier[ID_INCL1];
1333
if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c ||
1334
mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 ||
1335
mp->count['='] >= 20 && mp->count[';'] >= 20)
1336
{
1337
id_c:
1338
t1 = "";
1339
t2 = "c ";
1340
t3 = T("program");
1341
switch (*suff)
1342
{
1343
case 'c':
1344
case 'C':
1345
mp->mime = "application/x-cc";
1346
break;
1347
case 'l':
1348
case 'L':
1349
t1 = "lex ";
1350
mp->mime = "application/x-lex";
1351
break;
1352
default:
1353
t3 = T("header");
1354
if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5)
1355
{
1356
mp->mime = "application/x-cc";
1357
break;
1358
}
1359
/*FALLTHROUGH*/
1360
case 'y':
1361
case 'Y':
1362
t1 = "yacc ";
1363
mp->mime = "application/x-yacc";
1364
break;
1365
}
1366
if (mp->identifier[ID_CPLUSPLUS] >= 3)
1367
{
1368
t2 = "c++ ";
1369
mp->mime = "application/x-c++";
1370
}
1371
sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3);
1372
s = mp->sbuf;
1373
goto qualify;
1374
}
1375
}
1376
if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 &&
1377
(mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] ||
1378
mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2]))
1379
{
1380
id_mam:
1381
s = T("mam program");
1382
mp->mime = "application/x-mam";
1383
goto qualify;
1384
}
1385
if (mp->identifier[ID_FORTRAN] >= 8)
1386
{
1387
id_fortran:
1388
s = T("fortran program");
1389
mp->mime = "application/x-fortran";
1390
goto qualify;
1391
}
1392
if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2)
1393
{
1394
id_html:
1395
s = T("html input");
1396
mp->mime = "text/html";
1397
goto qualify;
1398
}
1399
if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1400
{
1401
id_copybook:
1402
s = T("cobol copybook");
1403
mp->mime = "application/x-cobol";
1404
goto qualify;
1405
}
1406
if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1407
{
1408
id_cobol:
1409
s = T("cobol program");
1410
mp->mime = "application/x-cobol";
1411
goto qualify;
1412
}
1413
if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1414
{
1415
id_pl1:
1416
s = T("pl1 program");
1417
mp->mime = "application/x-pl1";
1418
goto qualify;
1419
}
1420
if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{'])
1421
{
1422
id_tex:
1423
s = T("TeX input");
1424
mp->mime = "text/tex";
1425
goto qualify;
1426
}
1427
if (mp->identifier[ID_ASM] >= 4)
1428
{
1429
id_asm:
1430
s = T("as program");
1431
mp->mime = "application/x-as";
1432
goto qualify;
1433
}
1434
if (ckenglish(mp, pun, badpun))
1435
{
1436
s = T("english text");
1437
mp->mime = "text/plain";
1438
goto qualify;
1439
}
1440
}
1441
else if (streq(base, "core"))
1442
{
1443
mp->mime = "x-system/core";
1444
return T("core dump");
1445
}
1446
if (flags & (CC_binary|CC_notext))
1447
{
1448
b = (unsigned char*)mp->fbuf;
1449
e = b + mp->fbsz;
1450
n = 0;
1451
for (;;)
1452
{
1453
c = *b++;
1454
q = 0;
1455
while (c & 0x80)
1456
{
1457
c <<= 1;
1458
q++;
1459
}
1460
switch (q)
1461
{
1462
case 4:
1463
if (b < e && (*b++ & 0xc0) != 0x80)
1464
break;
1465
case 3:
1466
if (b < e && (*b++ & 0xc0) != 0x80)
1467
break;
1468
case 2:
1469
if (b < e && (*b++ & 0xc0) != 0x80)
1470
break;
1471
n = 1;
1472
case 0:
1473
if (b >= e)
1474
{
1475
if (n)
1476
{
1477
flags &= ~(CC_binary|CC_notext);
1478
flags |= CC_utf_8;
1479
}
1480
break;
1481
}
1482
continue;
1483
}
1484
break;
1485
}
1486
}
1487
if (flags & (CC_binary|CC_notext))
1488
{
1489
unsigned long d = 0;
1490
1491
if ((q = mp->fbsz / UCHAR_MAX) >= 2)
1492
{
1493
/*
1494
* compression/encryption via standard deviation
1495
*/
1496
1497
1498
for (c = 0; c < UCHAR_MAX; c++)
1499
{
1500
pun = mp->count[c] - q;
1501
d += pun * pun;
1502
}
1503
d /= mp->fbsz;
1504
}
1505
if (d <= 0)
1506
s = T("binary");
1507
else if (d < 4)
1508
s = T("encrypted");
1509
else if (d < 16)
1510
s = T("packed");
1511
else if (d < 64)
1512
s = T("compressed");
1513
else if (d < 256)
1514
s = T("delta");
1515
else
1516
s = T("data");
1517
mp->mime = "application/octet-stream";
1518
return s;
1519
}
1520
mp->mime = "text/plain";
1521
if (flags & CC_utf_8)
1522
s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text");
1523
else if (flags & CC_latin)
1524
s = (flags & CC_control) ? T("latin text with control characters") : T("latin text");
1525
else
1526
s = (flags & CC_control) ? T("text with control characters") : T("text");
1527
qualify:
1528
if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r'])
1529
{
1530
t = "dos ";
1531
mp->mime = "text/dos";
1532
}
1533
else
1534
t = "";
1535
if (code)
1536
{
1537
if (code == CC_ASCII)
1538
sfsprintf(buf, end - buf, "ascii %s%s", t, s);
1539
else
1540
{
1541
sfsprintf(buf, end - buf, "ebcdic%d %s%s", code - 1, t, s);
1542
mp->mime = "text/ebcdic";
1543
}
1544
s = buf;
1545
}
1546
else if (*t)
1547
{
1548
sfsprintf(buf, end - buf, "%s%s", t, s);
1549
s = buf;
1550
}
1551
return s;
1552
}
1553
1554
/*
1555
* return the basic magic string for file,st in buf,size
1556
*/
1557
1558
static char*
1559
type(register Magic_t* mp, const char* file, struct stat* st, char* buf, char* end)
1560
{
1561
register char* s;
1562
register char* t;
1563
1564
mp->mime = 0;
1565
if (!S_ISREG(st->st_mode))
1566
{
1567
if (S_ISDIR(st->st_mode))
1568
{
1569
mp->mime = "x-system/dir";
1570
return T("directory");
1571
}
1572
if (S_ISLNK(st->st_mode))
1573
{
1574
mp->mime = "x-system/lnk";
1575
s = buf;
1576
s += sfsprintf(s, end - s, T("symbolic link to "));
1577
if (pathgetlink(file, s, end - s) < 0)
1578
return T("cannot read symbolic link text");
1579
return buf;
1580
}
1581
if (S_ISBLK(st->st_mode))
1582
{
1583
mp->mime = "x-system/blk";
1584
sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st));
1585
return buf;
1586
}
1587
if (S_ISCHR(st->st_mode))
1588
{
1589
mp->mime = "x-system/chr";
1590
sfsprintf(buf, end - buf, T("character special (%s)"), fmtdev(st));
1591
return buf;
1592
}
1593
if (S_ISFIFO(st->st_mode))
1594
{
1595
mp->mime = "x-system/fifo";
1596
return "fifo";
1597
}
1598
#ifdef S_ISSOCK
1599
if (S_ISSOCK(st->st_mode))
1600
{
1601
mp->mime = "x-system/sock";
1602
return "socket";
1603
}
1604
#endif
1605
}
1606
if (!(mp->fbmx = st->st_size))
1607
s = T("empty");
1608
else if (!mp->fp)
1609
s = T("cannot read");
1610
else
1611
{
1612
mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1);
1613
if (mp->fbsz < 0)
1614
s = fmterror(errno);
1615
else if (mp->fbsz == 0)
1616
s = T("empty");
1617
else
1618
{
1619
mp->fbuf[mp->fbsz] = 0;
1620
mp->xoff = 0;
1621
mp->xbsz = 0;
1622
if (!(s = ckmagic(mp, file, buf, end, st, 0)))
1623
s = cklang(mp, file, buf, end, st);
1624
}
1625
}
1626
if (!mp->mime)
1627
mp->mime = "application/unknown";
1628
else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2))
1629
{
1630
register char* b;
1631
register char* be;
1632
register char* m;
1633
register char* me;
1634
1635
b = mp->mime;
1636
me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1;
1637
while (m < me && b < t)
1638
*m++ = *b++;
1639
b = t = s;
1640
for (;;)
1641
{
1642
if (!(be = strchr(t, ' ')))
1643
{
1644
be = b + strlen(b);
1645
break;
1646
}
1647
if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4))
1648
break;
1649
b = t;
1650
t = be + 1;
1651
}
1652
while (m < me && b < be)
1653
if ((*m++ = *b++) == ' ')
1654
*(m - 1) = '-';
1655
*m = 0;
1656
}
1657
return s;
1658
}
1659
1660
/*
1661
* low level for magicload()
1662
*/
1663
1664
static int
1665
load(register Magic_t* mp, char* file, register Sfio_t* fp)
1666
{
1667
register Entry_t* ep;
1668
register char* p;
1669
register char* p2;
1670
char* p3;
1671
char* next;
1672
int n;
1673
int lge;
1674
int lev;
1675
int ent;
1676
int old;
1677
int cont;
1678
Info_t* ip;
1679
Entry_t* ret;
1680
Entry_t* first;
1681
Entry_t* last = 0;
1682
Entry_t* fun['z' - 'a' + 1];
1683
1684
memzero(fun, sizeof(fun));
1685
cont = '$';
1686
ent = 0;
1687
lev = 0;
1688
old = 0;
1689
ret = 0;
1690
error_info.file = file;
1691
error_info.line = 0;
1692
first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1693
while (p = sfgetr(fp, '\n', 1))
1694
{
1695
error_info.line++;
1696
for (; isspace(*p); p++);
1697
1698
/*
1699
* nesting
1700
*/
1701
1702
switch (*p)
1703
{
1704
case 0:
1705
case '#':
1706
cont = '#';
1707
continue;
1708
case '{':
1709
if (++lev < MAXNEST)
1710
ep->nest = *p;
1711
else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1712
(*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST);
1713
continue;
1714
case '}':
1715
if (!last || lev <= 0)
1716
{
1717
if (mp->disc->errorf)
1718
(*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p);
1719
}
1720
else if (lev-- == ent)
1721
{
1722
ent = 0;
1723
ep->cont = ':';
1724
ep->offset = ret->offset;
1725
ep->nest = ' ';
1726
ep->type = ' ';
1727
ep->op = ' ';
1728
ep->desc = "[RETURN]";
1729
last = ep;
1730
ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1731
ret = 0;
1732
}
1733
else
1734
last->nest = *p;
1735
continue;
1736
default:
1737
if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|')
1738
{
1739
n = *p++;
1740
if (n >= 'a' && n <= 'z')
1741
n -= 'a';
1742
else
1743
{
1744
if (mp->disc->errorf)
1745
(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
1746
n = 0;
1747
}
1748
if (ret && mp->disc->errorf)
1749
(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
1750
if (*p == '{')
1751
{
1752
ent = ++lev;
1753
ret = ep;
1754
ep->desc = "[FUNCTION]";
1755
}
1756
else
1757
{
1758
if (*(p + 1) != ')' && mp->disc->errorf)
1759
(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a');
1760
ep->desc = "[CALL]";
1761
}
1762
ep->cont = cont;
1763
ep->offset = n;
1764
ep->nest = ' ';
1765
ep->type = ' ';
1766
ep->op = ' ';
1767
last = ep;
1768
ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1769
if (ret)
1770
fun[n] = last->value.lab = ep;
1771
else if (!(last->value.lab = fun[n]) && mp->disc->errorf)
1772
(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
1773
continue;
1774
}
1775
if (!ep->nest)
1776
ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' ';
1777
break;
1778
}
1779
1780
/*
1781
* continuation
1782
*/
1783
1784
cont = '$';
1785
switch (*p)
1786
{
1787
case '>':
1788
old = 1;
1789
if (*(p + 1) == *p)
1790
{
1791
/*
1792
* old style nesting push
1793
*/
1794
1795
p++;
1796
old = 2;
1797
if (!lev && last)
1798
{
1799
lev = 1;
1800
last->nest = '{';
1801
if (last->cont == '>')
1802
last->cont = '&';
1803
ep->nest = '1';
1804
}
1805
}
1806
/*FALLTHROUGH*/
1807
case '+':
1808
case '&':
1809
case '|':
1810
ep->cont = *p++;
1811
break;
1812
default:
1813
if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf)
1814
(*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p);
1815
/*FALLTHROUGH*/
1816
case '*':
1817
case '0': case '1': case '2': case '3': case '4':
1818
case '5': case '6': case '7': case '8': case '9':
1819
ep->cont = (lev > 0) ? '&' : '#';
1820
break;
1821
}
1822
switch (old)
1823
{
1824
case 1:
1825
old = 0;
1826
if (lev)
1827
{
1828
/*
1829
* old style nesting pop
1830
*/
1831
1832
lev = 0;
1833
if (last)
1834
last->nest = '}';
1835
ep->nest = ' ';
1836
if (ep->cont == '&')
1837
ep->cont = '#';
1838
}
1839
break;
1840
case 2:
1841
old = 1;
1842
break;
1843
}
1844
if (isdigit(*p))
1845
{
1846
/*
1847
* absolute offset
1848
*/
1849
1850
ep->offset = strton(p, &next, NiL, 0);
1851
p2 = next;
1852
}
1853
else
1854
{
1855
for (p2 = p; *p2 && !isspace(*p2); p2++);
1856
if (!*p2)
1857
{
1858
if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1859
(*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1860
continue;
1861
}
1862
1863
/*
1864
* offset expression
1865
*/
1866
1867
*p2++ = 0;
1868
ep->expr = vmstrdup(mp->vm, p);
1869
if (isalpha(*p))
1870
ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0;
1871
else if (*p == '(' && ep->cont == '>')
1872
{
1873
/*
1874
* convert old style indirection to @
1875
*/
1876
1877
p = ep->expr + 1;
1878
for (;;)
1879
{
1880
switch (*p++)
1881
{
1882
case 0:
1883
case '@':
1884
case '(':
1885
break;
1886
case ')':
1887
break;
1888
default:
1889
continue;
1890
}
1891
break;
1892
}
1893
if (*--p == ')')
1894
{
1895
*p = 0;
1896
*ep->expr = '@';
1897
}
1898
}
1899
}
1900
for (; isspace(*p2); p2++);
1901
for (p = p2; *p2 && !isspace(*p2); p2++);
1902
if (!*p2)
1903
{
1904
if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1905
(*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1906
continue;
1907
}
1908
*p2++ = 0;
1909
1910
/*
1911
* type
1912
*/
1913
1914
if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e')
1915
{
1916
ep->swap = ~(*p == 'l' ? 7 : 0);
1917
p += 2;
1918
}
1919
if (*p == 's')
1920
{
1921
if (*(p + 1) == 'h')
1922
ep->type = 'h';
1923
else
1924
ep->type = 's';
1925
}
1926
else if (*p == 'a')
1927
ep->type = 's';
1928
else
1929
ep->type = *p;
1930
if (p = strchr(p, '&'))
1931
{
1932
/*
1933
* old style mask
1934
*/
1935
1936
ep->mask = strton(++p, NiL, NiL, 0);
1937
}
1938
for (; isspace(*p2); p2++);
1939
if (ep->mask)
1940
*--p2 = '=';
1941
1942
/*
1943
* comparison operation
1944
*/
1945
1946
p = p2;
1947
if (p2 = strchr(p, '\t'))
1948
*p2++ = 0;
1949
else
1950
{
1951
int qe = 0;
1952
int qn = 0;
1953
1954
/*
1955
* assume balanced {}[]()\\""'' field
1956
*/
1957
1958
for (p2 = p;;)
1959
{
1960
switch (n = *p2++)
1961
{
1962
case 0:
1963
break;
1964
case '{':
1965
if (!qe)
1966
qe = '}';
1967
if (qe == '}')
1968
qn++;
1969
continue;
1970
case '(':
1971
if (!qe)
1972
qe = ')';
1973
if (qe == ')')
1974
qn++;
1975
continue;
1976
case '[':
1977
if (!qe)
1978
qe = ']';
1979
if (qe == ']')
1980
qn++;
1981
continue;
1982
case '}':
1983
case ')':
1984
case ']':
1985
if (qe == n && qn > 0)
1986
qn--;
1987
continue;
1988
case '"':
1989
case '\'':
1990
if (!qe)
1991
qe = n;
1992
else if (qe == n)
1993
qe = 0;
1994
continue;
1995
case '\\':
1996
if (*p2)
1997
p2++;
1998
continue;
1999
default:
2000
if (!qe && isspace(n))
2001
break;
2002
continue;
2003
}
2004
if (n)
2005
*(p2 - 1) = 0;
2006
else
2007
p2--;
2008
break;
2009
}
2010
}
2011
lge = 0;
2012
if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
2013
ep->op = '=';
2014
else
2015
{
2016
if (*p == '&')
2017
{
2018
ep->mask = strton(++p, &next, NiL, 0);
2019
p = next;
2020
}
2021
switch (*p)
2022
{
2023
case '=':
2024
case '>':
2025
case '<':
2026
case '*':
2027
ep->op = *p++;
2028
if (*p == '=')
2029
{
2030
p++;
2031
switch (ep->op)
2032
{
2033
case '>':
2034
lge = -1;
2035
break;
2036
case '<':
2037
lge = 1;
2038
break;
2039
}
2040
}
2041
break;
2042
case '!':
2043
case '@':
2044
ep->op = *p++;
2045
if (*p == '=')
2046
p++;
2047
break;
2048
case 'x':
2049
p++;
2050
ep->op = '*';
2051
break;
2052
default:
2053
ep->op = '=';
2054
if (ep->mask)
2055
ep->value.num = ep->mask;
2056
break;
2057
}
2058
}
2059
if (ep->op != '*' && !ep->value.num)
2060
{
2061
if (ep->type == 'e')
2062
{
2063
if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0))
2064
{
2065
ep->value.sub->re_disc = &mp->redisc;
2066
if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE)))
2067
{
2068
p += ep->value.sub->re_npat;
2069
if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0)))
2070
p += ep->value.sub->re_npat;
2071
}
2072
if (n)
2073
{
2074
regmessage(mp, ep->value.sub, n);
2075
ep->value.sub = 0;
2076
}
2077
else if (*p && mp->disc->errorf)
2078
(*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p);
2079
}
2080
}
2081
else if (ep->type == 'm')
2082
{
2083
ep->mask = stresc(p) + 1;
2084
ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0);
2085
memcpy(ep->value.str, p, ep->mask);
2086
if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)"))
2087
ep->value.str[ep->mask - 1] = '*';
2088
}
2089
else if (ep->type == 's')
2090
{
2091
ep->mask = stresc(p);
2092
ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0);
2093
memcpy(ep->value.str, p, ep->mask);
2094
}
2095
else if (*p == '\'')
2096
{
2097
stresc(p);
2098
ep->value.num = *(unsigned char*)(p + 1) + lge;
2099
}
2100
else if (strmatch(p, "+([a-z])\\(*\\)"))
2101
{
2102
char* t;
2103
2104
t = p;
2105
ep->type = 'V';
2106
ep->op = *p;
2107
while (*p && *p++ != '(');
2108
switch (ep->op)
2109
{
2110
case 'l':
2111
n = *p++;
2112
if (n < 'a' || n > 'z')
2113
{
2114
if (mp->disc->errorf)
2115
(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
2116
}
2117
else if (!fun[n -= 'a'])
2118
{
2119
if (mp->disc->errorf)
2120
(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
2121
}
2122
else
2123
{
2124
ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0);
2125
ep->value.loop->lab = fun[n];
2126
while (*p && *p++ != ',');
2127
ep->value.loop->start = strton(p, &t, NiL, 0);
2128
while (*t && *t++ != ',');
2129
ep->value.loop->size = strton(t, &t, NiL, 0);
2130
}
2131
break;
2132
case 'm':
2133
case 'r':
2134
ep->desc = vmnewof(mp->vm, 0, char, 32, 0);
2135
ep->mime = vmnewof(mp->vm, 0, char, 32, 0);
2136
break;
2137
case 'v':
2138
break;
2139
default:
2140
if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2141
(*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t);
2142
break;
2143
}
2144
}
2145
else
2146
{
2147
ep->value.num = strton(p, NiL, NiL, 0) + lge;
2148
if (ep->op == '@')
2149
ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num));
2150
}
2151
}
2152
2153
/*
2154
* file description
2155
*/
2156
2157
if (p2)
2158
{
2159
for (; isspace(*p2); p2++);
2160
if (p = strchr(p2, '\t'))
2161
{
2162
/*
2163
* check for message catalog index
2164
*/
2165
2166
*p++ = 0;
2167
if (isalpha(*p2))
2168
{
2169
for (p3 = p2; isalnum(*p3); p3++);
2170
if (*p3++ == ':')
2171
{
2172
for (; isdigit(*p3); p3++);
2173
if (!*p3)
2174
{
2175
for (p2 = p; isspace(*p2); p2++);
2176
if (p = strchr(p2, '\t'))
2177
*p++ = 0;
2178
}
2179
}
2180
}
2181
}
2182
stresc(p2);
2183
ep->desc = vmstrdup(mp->vm, p2);
2184
if (p)
2185
{
2186
for (; isspace(*p); p++);
2187
if (*p)
2188
ep->mime = vmstrdup(mp->vm, p);
2189
}
2190
}
2191
else
2192
ep->desc = "";
2193
2194
/*
2195
* get next entry
2196
*/
2197
2198
last = ep;
2199
ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
2200
}
2201
if (last)
2202
{
2203
last->next = 0;
2204
if (mp->magiclast)
2205
mp->magiclast->next = first;
2206
else
2207
mp->magic = first;
2208
mp->magiclast = last;
2209
}
2210
vmfree(mp->vm, ep);
2211
if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2212
{
2213
if (lev < 0)
2214
(*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators");
2215
else if (lev > 0)
2216
(*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators");
2217
if (ret)
2218
(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
2219
}
2220
error_info.file = 0;
2221
error_info.line = 0;
2222
return 0;
2223
}
2224
2225
/*
2226
* load a magic file into mp
2227
*/
2228
2229
int
2230
magicload(register Magic_t* mp, const char* file, unsigned long flags)
2231
{
2232
register char* s;
2233
register char* e;
2234
register char* t;
2235
int n;
2236
int found;
2237
int list;
2238
Sfio_t* fp;
2239
2240
mp->flags = mp->disc->flags | flags;
2241
found = 0;
2242
if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1))
2243
{
2244
if (!(s = getenv(MAGIC_FILE_ENV)) || !*s)
2245
s = MAGIC_FILE;
2246
}
2247
for (;;)
2248
{
2249
if (!list)
2250
e = 0;
2251
else if (e = strchr(s, ':'))
2252
{
2253
/*
2254
* ok, so ~ won't work for the last list element
2255
* we do it for MAGIC_FILES_ENV anyway
2256
*/
2257
2258
if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME")))
2259
{
2260
sfputr(mp->tmp, t, -1);
2261
s += n - 1;
2262
}
2263
sfwrite(mp->tmp, s, e - s);
2264
if (!(s = sfstruse(mp->tmp)))
2265
goto nospace;
2266
}
2267
if (!*s || streq(s, "-"))
2268
s = MAGIC_FILE;
2269
if (!(fp = sfopen(NiL, s, "r")))
2270
{
2271
if (list)
2272
{
2273
if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf))) && !strchr(s, '/'))
2274
{
2275
strcpy(mp->fbuf, s);
2276
sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf);
2277
if (!(s = sfstruse(mp->tmp)))
2278
goto nospace;
2279
if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf))))
2280
goto next;
2281
}
2282
if (!(fp = sfopen(NiL, t, "r")))
2283
goto next;
2284
}
2285
else
2286
{
2287
if (mp->disc->errorf)
2288
(*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s);
2289
return -1;
2290
}
2291
}
2292
found = 1;
2293
n = load(mp, s, fp);
2294
sfclose(fp);
2295
if (n && !list)
2296
return -1;
2297
next:
2298
if (!e)
2299
break;
2300
s = e + 1;
2301
}
2302
if (!found)
2303
{
2304
if (mp->flags & MAGIC_VERBOSE)
2305
{
2306
if (mp->disc->errorf)
2307
(*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file");
2308
}
2309
return -1;
2310
}
2311
return 0;
2312
nospace:
2313
if (mp->disc->errorf)
2314
(*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
2315
return -1;
2316
}
2317
2318
/*
2319
* open a magic session
2320
*/
2321
2322
Magic_t*
2323
magicopen(Magicdisc_t* disc)
2324
{
2325
register Magic_t* mp;
2326
register int i;
2327
register int n;
2328
register int f;
2329
register int c;
2330
register Vmalloc_t* vm;
2331
unsigned char* map[CC_MAPS + 1];
2332
2333
if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
2334
return 0;
2335
if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0)))
2336
{
2337
vmclose(vm);
2338
return 0;
2339
}
2340
mp->id = lib;
2341
mp->disc = disc;
2342
mp->vm = vm;
2343
mp->flags = disc->flags;
2344
mp->redisc.re_version = REG_VERSION;
2345
mp->redisc.re_flags = REG_NOFREE;
2346
mp->redisc.re_errorf = (regerror_t)disc->errorf;
2347
mp->redisc.re_resizef = (regresize_t)vmgetmem;
2348
mp->redisc.re_resizehandle = (void*)mp->vm;
2349
mp->dtdisc.key = offsetof(Info_t, name);
2350
mp->dtdisc.link = offsetof(Info_t, link);
2351
if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dtoset)))
2352
goto bad;
2353
for (n = 0; n < elementsof(info); n++)
2354
dtinsert(mp->infotab, &info[n]);
2355
for (i = 0; i < CC_MAPS; i++)
2356
map[i] = ccmap(i, CC_ASCII);
2357
mp->x2n = ccmap(CC_ALIEN, CC_NATIVE);
2358
for (n = 0; n <= UCHAR_MAX; n++)
2359
{
2360
f = 0;
2361
i = CC_MAPS;
2362
while (--i >= 0)
2363
{
2364
c = ccmapchr(map[i], n);
2365
f = (f << CC_BIT) | CCTYPE(c);
2366
}
2367
mp->cctype[n] = f;
2368
}
2369
return mp;
2370
bad:
2371
magicclose(mp);
2372
return 0;
2373
}
2374
2375
/*
2376
* close a magicopen() session
2377
*/
2378
2379
int
2380
magicclose(register Magic_t* mp)
2381
{
2382
if (!mp)
2383
return -1;
2384
if (mp->tmp)
2385
sfstrclose(mp->tmp);
2386
if (mp->vm)
2387
vmclose(mp->vm);
2388
return 0;
2389
}
2390
2391
/*
2392
* return the magic string for file with optional stat info st
2393
*/
2394
2395
char*
2396
magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st)
2397
{
2398
off_t off;
2399
char* s;
2400
2401
mp->flags = mp->disc->flags;
2402
mp->mime = 0;
2403
if (!st)
2404
s = T("cannot stat");
2405
else
2406
{
2407
if (mp->fp = fp)
2408
off = sfseek(mp->fp, (off_t)0, SEEK_CUR);
2409
s = type(mp, file, st, mp->tbuf, &mp->tbuf[sizeof(mp->tbuf)-1]);
2410
if (mp->fp)
2411
sfseek(mp->fp, off, SEEK_SET);
2412
if (!(mp->flags & (MAGIC_MIME|MAGIC_ALL)))
2413
{
2414
if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128))
2415
sfprintf(mp->tmp, "%s ", T("short"));
2416
sfprintf(mp->tmp, "%s", s);
2417
if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)))
2418
sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable"));
2419
if (st->st_mode & S_ISUID)
2420
sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid));
2421
if (st->st_mode & S_ISGID)
2422
sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid));
2423
if (st->st_mode & S_ISVTX)
2424
sfprintf(mp->tmp, ", sticky");
2425
if (!(s = sfstruse(mp->tmp)))
2426
s = T("out of space");
2427
}
2428
}
2429
if (mp->flags & MAGIC_MIME)
2430
s = mp->mime;
2431
if (!s)
2432
s = T("error");
2433
return s;
2434
}
2435
2436
/*
2437
* list the magic table in mp on sp
2438
*/
2439
2440
int
2441
magiclist(register Magic_t* mp, register Sfio_t* sp)
2442
{
2443
register Entry_t* ep = mp->magic;
2444
register Entry_t* rp = 0;
2445
2446
mp->flags = mp->disc->flags;
2447
sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n");
2448
while (ep)
2449
{
2450
sfprintf(sp, "%c %c\t", ep->cont, ep->nest);
2451
if (ep->expr)
2452
sfprintf(sp, "%s", ep->expr);
2453
else
2454
sfprintf(sp, "%ld", ep->offset);
2455
sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask);
2456
switch (ep->type)
2457
{
2458
case 'm':
2459
case 's':
2460
sfputr(sp, fmtesc(ep->value.str), -1);
2461
break;
2462
case 'V':
2463
switch (ep->op)
2464
{
2465
case 'l':
2466
sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset);
2467
break;
2468
case 'v':
2469
sfprintf(sp, "vcodex()");
2470
break;
2471
default:
2472
sfprintf(sp, "%p", ep->value.str);
2473
break;
2474
}
2475
break;
2476
default:
2477
sfprintf(sp, "%lo", ep->value.num);
2478
break;
2479
}
2480
sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc));
2481
if (ep->cont == '$' && !ep->value.lab->mask)
2482
{
2483
rp = ep;
2484
ep = ep->value.lab;
2485
}
2486
else
2487
{
2488
if (ep->cont == ':')
2489
{
2490
ep = rp;
2491
ep->value.lab->mask = 1;
2492
}
2493
ep = ep->next;
2494
}
2495
}
2496
return 0;
2497
}
2498
2499