Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/cmd/pzip/rectify.c
1808 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1998-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Glenn Fowler <[email protected]> *
18
* *
19
***********************************************************************/
20
#pragma prototyped
21
22
/*
23
* induce fixed length record groups from data
24
*/
25
26
static const char usage[] =
27
"[-?\n@(#)$Id: rectify (AT&T Research) 1999-03-22 $\n]"
28
USAGE_LICENSE
29
"[+NAME?rectify - induce fixed length record groups from data]"
30
"[+DESCRIPTION?\brectify\b induces fixed length record groups from input data"
31
" by sampling and comparing character frequencies. The standard input is"
32
" read if \a-\a or no files are specified.]"
33
34
"[c:context?List \acontext\a records at the beginning and end of"
35
" record groups larger that 3*\acontext\a.]#[context]"
36
"[d:description?Specify a structured dump description file. Each line of"
37
" this file describes the size and content of a contiguous portion"
38
" of the input file. The description is applied separately to each"
39
" input file. Comments and optional labels in the following"
40
" descriptions are listed with the \b--verbose\b option. Supported"
41
" descriptions are:]:[file]{"
42
" [+c comment?comment]"
43
" [+d size [label]]?\asize\a bytes of data with optional label]"
44
" [+i size [label]]?ignore \asize\a bytes of data]"
45
" [+r size count [label]]?\acount\a records of length \asize\a]"
46
" [+t count?Match \acount\a records against the \bT\b record"
47
" table. \acount\a=0 continues until no record type"
48
" match is found.]"
49
" [+z size [label]]?a string with length determined by a"
50
" \asize\a byte binary integer]"
51
" [+T idlen id size unit [offset]]?Defines a sized record"
52
" table entry.]{"
53
" [+idlen?type identifier length, must be"
54
" <= 4 bytes]"
55
" [+id?type identifier, starting at record offset 0]"
56
" [+size?default record size]"
57
" [+unit?if > 0 then the record is variable length and"
58
" the size is the byte at \aoffset\a]"
59
" [+offset?if \aunit\a > 0 then this byte multiplied by"
60
" \aunit\a is the size of variable length data"
61
" appended to the record]"
62
" }"
63
"}"
64
"[f:format?Byte output \bprintf\b(3) format.]:[format:=02x]"
65
"[g!:group?Group output in 4's.]"
66
"[m:min?Minimum record length to consider.]#[min:=8]"
67
"[n:count?List the top \acount\a candidate record lengths.]#[count:=16]"
68
"[o:offset?Start description listing at \aoffset\a.]#[offset:=0]"
69
"[r:run?List runs at least as long as \arun\a.]#[run]"
70
"[v:verbose?Dump description labels with data.]"
71
72
"\n"
73
"\n[ file ... ]\n"
74
"\n"
75
"[+SEE ALSO?\bpin\b(1), \bpop\b(1)]"
76
;
77
78
#include <ast.h>
79
#include <error.h>
80
#include <tok.h>
81
82
typedef struct Item_s
83
{
84
unsigned long index;
85
unsigned long offset;
86
unsigned long start;
87
unsigned long count;
88
unsigned long run;
89
} Item_t;
90
91
typedef struct
92
{
93
int len;
94
unsigned long id;
95
int size;
96
int unit;
97
int offset;
98
} Type_t;
99
100
typedef struct
101
{
102
Sfoff_t offset;
103
unsigned long count;
104
} Loop_t;
105
106
static struct
107
{
108
Type_t type[4 * 1024];
109
Item_t mod[4 * 1024];
110
unsigned long hit[UCHAR_MAX + 1];
111
Sfoff_t offset;
112
char* format1;
113
char* format4;
114
unsigned long context;
115
unsigned long count;
116
unsigned long min;
117
unsigned long run;
118
int group;
119
int types;
120
int typelen;
121
int typelast;
122
} state;
123
124
/*
125
* order items by count hi to lo
126
*/
127
128
static int
129
bycount(const void* va, const void* vb)
130
{
131
register Item_t* a = (Item_t*)va;
132
register Item_t* b = (Item_t*)vb;
133
134
if (a->count < b->count)
135
return 1;
136
if (a->count > b->count)
137
return -1;
138
if (a < b)
139
return 1;
140
if (a > b)
141
return -1;
142
return 0;
143
}
144
145
/*
146
* rectify fp open for read on file
147
*/
148
149
static void
150
rectify(register Sfio_t* fp, char* file, int verbose)
151
{
152
register unsigned char* s;
153
register Item_t* p;
154
register unsigned long* q;
155
register unsigned long offset;
156
register unsigned long i;
157
unsigned long n;
158
unsigned long cur;
159
unsigned long dif;
160
unsigned long max;
161
162
memset(state.hit, 0, sizeof(state.hit));
163
memset(state.mod, 0, sizeof(state.mod));
164
for (i = 0; i < elementsof(state.mod); i++)
165
state.mod[i].index = i;
166
max = 0;
167
offset = 0;
168
while (s = sfreserve(fp, SF_UNBOUND, 0))
169
{
170
n = sfvalue(fp);
171
for (i = 0; i < n; i++)
172
{
173
cur = offset + i;
174
q = state.hit + s[i];
175
dif = cur - *q;
176
*q = cur;
177
if (dif < elementsof(state.mod))
178
{
179
p = state.mod + dif;
180
if (dif > max)
181
max = dif;
182
p->count++;
183
if ((cur - p->offset) <= dif)
184
{
185
if (!p->run++)
186
p->start = cur;
187
}
188
else if (p->run)
189
{
190
if (state.run && p->run >= state.run && p->index >= state.min)
191
sfprintf(sfstdout, "run %7lu %7lu %7lu\n", p->index, p->run, p->start);
192
p->run = 0;
193
}
194
p->offset = cur;
195
}
196
}
197
offset += n;
198
}
199
qsort(state.mod, elementsof(state.mod), sizeof(state.mod[0]), bycount);
200
n = 0;
201
for (i = 0; i < elementsof(state.mod) && n < state.count; i++)
202
if (state.mod[i].index >= state.min)
203
{
204
n++;
205
sfprintf(sfstdout, "rec %7lu %7lu %7lu\n", state.mod[i].index, state.mod[i].count, state.mod[i].offset);
206
}
207
}
208
209
/*
210
* dump size n buffer b to op in 4 hex byte chunks
211
*/
212
213
static void
214
dump(Sfio_t* op, register unsigned char* b, size_t n)
215
{
216
register unsigned char* e = b + n / 4 * 4;
217
register unsigned char* x;
218
219
x = state.group ? (b + n) : b;
220
while (b < e)
221
{
222
sfprintf(op, state.format4, b[0], b[1], b[2], b[3]);
223
if ((b += 4) < x)
224
sfputc(op, ' ');
225
}
226
while (b < x)
227
sfprintf(op, state.format1, *b++);
228
sfputc(op, '\n');
229
}
230
231
/*
232
* return a number from b and advance b
233
*/
234
235
static unsigned long
236
number(char** b)
237
{
238
register char* s;
239
unsigned long r;
240
241
for (s = *b; *s == ' ' || *s == '\t'; s++);
242
r = strtoul(s, b, 0);
243
if (*b == s)
244
error(3, "numeric argument expected");
245
for (s = *b; *s == ' ' || *s == '\t'; s++);
246
*b = s;
247
return r;
248
}
249
250
/*
251
* dump fp according to dp
252
*/
253
254
static void
255
describe(register Sfio_t* dp, char* desc, register Sfio_t* fp, char* file, int verbose)
256
{
257
register unsigned char* p;
258
unsigned char* e;
259
long size;
260
long count;
261
unsigned long context;
262
Sfoff_t offset;
263
Sfoff_t skip;
264
int nest;
265
int op;
266
char* s;
267
char* t;
268
Loop_t loop[64];
269
unsigned long id[5];
270
271
error_info.file = desc;
272
error_info.line = 0;
273
offset = 0;
274
nest = -1;
275
while (s = sfgetr(dp, '\n', 0))
276
{
277
error_info.line++;
278
for (t = s + sfvalue(dp) - 1; *s == ' ' || *s == '\t'; s++);
279
for (op = *s; *s != ' ' && *s != '\t' && *s != '\n'; s++);
280
for (; *s == ' ' || *s == '\t'; s++);
281
switch (op)
282
{
283
case '#':
284
case '\n':
285
break;
286
case '{':
287
if (++nest >= elementsof(loop))
288
error(3, "%c: nesting too deep -- %d max", op, elementsof(loop));
289
count = number(&s);
290
loop[nest].offset = sfseek(dp, (Sfoff_t)0, SEEK_CUR);
291
loop[nest].count = count;
292
if (verbose && offset >= state.offset)
293
sfprintf(sfstdout, "=== %I*d === loop %d %lu %I*d === %-.*s\n", sizeof(offset), offset, nest, loop[nest].count, sizeof(loop[nest].offset), loop[nest].offset, t - s, s);
294
break;
295
case '}':
296
if (nest < 0)
297
error(3, "%c: no matching {", op); /*balance}*/
298
if (loop[nest].count-- <= 1)
299
nest--;
300
else if (sfseek(dp, loop[nest].offset, SEEK_SET) < 0)
301
error(ERROR_SYSTEM|3, "loop seek error to %I*d", sizeof(loop[nest].offset), loop[nest].offset);
302
else if (verbose && offset >= state.offset)
303
sfprintf(sfstdout, "=== %I*d === loop %d %lu %I*d === %-.*s\n", sizeof(offset), offset, nest, loop[nest].count, sizeof(loop[nest].offset), loop[nest].offset, t - s, s);
304
break;
305
case 'c':
306
if (verbose && offset >= state.offset)
307
sfprintf(sfstdout, "=== %I*d === %-.*s\n", sizeof(offset), offset, t - s, s);
308
break;
309
case 'd':
310
size = number(&s);
311
if (offset >= state.offset)
312
{
313
if (verbose)
314
sfprintf(sfstdout, "=== %I*d === %ld === %-.*s\n", sizeof(offset), offset, size, t - s, s);
315
if (!(p = sfreserve(fp, size, 0)))
316
error(ERROR_SYSTEM|3, "%s: cannot read %ld bytes at %I*d", file, size, sizeof(offset), offset);
317
dump(sfstdout, p, size);
318
}
319
else if (sfseek(fp, (Sfoff_t)size, SEEK_CUR) < 0)
320
error(ERROR_SYSTEM|3, "%s: cannot seek %ld bytes at %I*d", file, size, sizeof(offset), offset);
321
offset += size;
322
break;
323
case 'i':
324
size = number(&s);
325
if (verbose && offset >= state.offset)
326
sfprintf(sfstdout, "=== %I*d === %ld === %-.*s\n", sizeof(offset), offset, size, t - s, s);
327
if (sfseek(fp, (Sfoff_t)size, SEEK_CUR) < 0)
328
error(ERROR_SYSTEM|3, "%s: cannot seek %ld bytes at %I*d", file, size, sizeof(offset), offset);
329
offset += size;
330
break;
331
case 'r':
332
size = number(&s);
333
count = number(&s);
334
if (offset < state.offset)
335
{
336
skip = count * size;
337
if ((offset + skip) > state.offset)
338
{
339
skip = (state.offset - offset) / size;
340
count -= skip;
341
skip *= size;
342
if (sfseek(fp, skip, SEEK_CUR) < 0)
343
error(ERROR_SYSTEM|3, "%s: cannot seek %I*d bytes at %I*d", file, sizeof(skip), skip, sizeof(offset), offset);
344
offset += skip;
345
}
346
}
347
if (offset >= state.offset)
348
{
349
if (verbose)
350
sfprintf(sfstdout, "=== %I*d === %ld * %ld === %-.*s\n", sizeof(offset), offset, size, count, t - s, s);
351
if (state.context && count > (3 * state.context))
352
{
353
skip = (count - 2 * state.context) * size;
354
count = state.context;
355
while (count-- > 0)
356
{
357
if (!(p = sfreserve(fp, size, 0)))
358
error(ERROR_SYSTEM|3, "cannot read %ld bytes at %I*d", size, sizeof(offset), offset);
359
offset += size;
360
dump(sfstdout, p, size);
361
}
362
sfprintf(sfstdout, " . . .\n");
363
if (sfseek(fp, skip, SEEK_CUR) < 0)
364
error(ERROR_SYSTEM|3, "%s: cannot seek %I*d bytes at %I*d", file, sizeof(skip), skip, sizeof(offset), offset);
365
offset += skip;
366
count = state.context;
367
}
368
while (count-- > 0)
369
{
370
if (!(p = sfreserve(fp, size, 0)))
371
error(ERROR_SYSTEM|3, "cannot read %ld bytes at %I*d", size, sizeof(offset), offset);
372
offset += size;
373
dump(sfstdout, p, size);
374
}
375
}
376
else
377
{
378
skip = count * size;
379
if (sfseek(fp, skip, SEEK_CUR) < 0)
380
error(ERROR_SYSTEM|3, "%s: cannot seek %I*d bytes at %I*d", file, sizeof(skip), skip, sizeof(offset), offset);
381
offset += skip;
382
}
383
break;
384
case 't':
385
if (!state.typelen)
386
error(3, "no sized record types defined");
387
context = 0;
388
count = number(&s);
389
do
390
{
391
if (!(p = sfreserve(fp, state.typelen, SF_LOCKR)))
392
break;
393
switch (state.typelen)
394
{
395
case 4: id[4] = (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
396
case 3: id[3] = (p[0] << 16) | (p[1] << 8) | p[2];
397
case 2: id[2] = (p[0] << 8) | p[1];
398
case 1: id[1] = p[0];
399
}
400
sfread(fp, p, 0);
401
if (state.type[state.typelast].id != id[state.type[state.typelast].len])
402
{
403
for (state.typelast = 0; state.typelast < state.types && state.type[state.typelast].id != id[state.type[state.typelast].len]; state.typelast++);
404
if (state.typelast >= state.types)
405
{
406
if (verbose)
407
sfprintf(sfstdout, "=== %I*d === %0*x === type not found\n", sizeof(offset), offset, 2 * state.typelen, id[state.typelen]);
408
break;
409
}
410
if (verbose && offset >= state.offset)
411
sfprintf(sfstdout, "=== %I*d === %0*x === type\n", sizeof(offset), offset, 2 * state.type[state.typelast].len, id[state.type[state.typelast].len]);
412
context = 0;
413
}
414
size = state.type[state.typelast].size;
415
if (!(p = sfreserve(fp, size, state.type[state.typelast].unit ? SF_LOCKR : 0)))
416
error(ERROR_SYSTEM|3, "cannot read %ld bytes at %I*d", size, sizeof(offset), offset);
417
if (state.type[state.typelast].unit)
418
{
419
size += p[state.type[state.typelast].offset] * state.type[state.typelast].unit;
420
sfread(fp, p, 0);
421
if (!(p = sfreserve(fp, size, 0)))
422
error(ERROR_SYSTEM|3, "cannot read %ld bytes at %I*d", size, sizeof(offset), offset);
423
}
424
if (offset >= state.offset)
425
{
426
if (!state.context)
427
dump(sfstdout, p, size);
428
else if (context++ < state.context)
429
dump(sfstdout, p, size);
430
else if (context == state.context + 1)
431
sfprintf(sfstdout, " . . .\n");
432
}
433
offset += size;
434
} while (!count || --count);
435
break;
436
case 'z':
437
size = number(&s);
438
if (!(p = sfreserve(fp, size, 0)))
439
error(ERROR_SYSTEM|3, "cannot read %ld bytes at %I*d", size, sizeof(offset), offset);
440
count = 0;
441
e = p + size;
442
while (p < e)
443
count = (count << 8) | *p++;
444
if (offset >= state.offset)
445
{
446
if (verbose)
447
sfprintf(sfstdout, "=== %I*d === %ld === %-.*s\n", sizeof(offset), offset, size, t - s, s);
448
offset += size;
449
if (!(p = sfreserve(fp, count, 0)))
450
error(ERROR_SYSTEM|3, "cannot read %ld bytes at %I*d", count, sizeof(offset), offset);
451
sfprintf(sfstdout, "\"%s\"\n", fmtnesq((char*)p, "\"", count));
452
}
453
else
454
{
455
offset += 2;
456
if (sfseek(fp, (Sfoff_t)count, SEEK_CUR) < 0)
457
error(ERROR_SYSTEM|3, "%s: cannot seek %ld bytes at %I*d", file, count, sizeof(offset), offset);
458
}
459
offset += count;
460
break;
461
case 'T':
462
if (state.types >= elementsof(state.type))
463
error(3, "too many types -- %d max", elementsof(state.type));
464
if ((state.type[state.types].len = number(&s)) > state.typelen)
465
state.typelen = state.type[state.types].len;
466
if (state.type[state.types].len >= elementsof(id))
467
error(3, "type id length must be <= %d", elementsof(id) - 1);
468
state.type[state.types].id = number(&s);
469
state.type[state.types].size = number(&s);
470
if (state.type[state.types].unit = number(&s))
471
state.type[state.types].offset = number(&s);
472
state.types++;
473
break;
474
default:
475
error(2, "%c: unknown description op", op);
476
break;
477
}
478
}
479
if (verbose && offset >= state.offset)
480
sfprintf(sfstdout, "=== %I*d === EOF\n", sizeof(offset), offset);
481
error_info.file = 0;
482
error_info.line = 0;
483
if (skip = sfseek(fp, (Sfoff_t)0, SEEK_END) - offset)
484
error(1, "%s: %I*d bytes ignored at %I*d", file, sizeof(skip), skip, sizeof(offset), offset);
485
}
486
487
int
488
main(int argc, char** argv)
489
{
490
register char* file;
491
int n;
492
Sfio_t* fp;
493
Sfio_t* dp;
494
495
char* desc = 0;
496
char* format = "02x";
497
int verbose = 0;
498
499
error_info.id = "rectify";
500
state.count = 16;
501
state.group = 1;
502
state.min = 8;
503
state.run = 0;
504
for (;;)
505
{
506
switch (optget(argv, usage))
507
{
508
case 'c':
509
state.context = opt_info.num;
510
continue;
511
case 'd':
512
if (desc)
513
error(2, "%s: only one description file allowed", opt_info.arg);
514
else
515
desc = opt_info.arg;
516
continue;
517
case 'f':
518
format = opt_info.arg;
519
continue;
520
case 'g':
521
state.group = opt_info.num;
522
continue;
523
case 'm':
524
state.min = opt_info.num;
525
continue;
526
case 'n':
527
state.count = opt_info.num;
528
continue;
529
case 'o':
530
state.offset = opt_info.num;
531
continue;
532
case 'r':
533
state.run = opt_info.num;
534
continue;
535
case 'v':
536
verbose = opt_info.num;
537
continue;
538
case '?':
539
error(ERROR_USAGE|4, "%s", opt_info.arg);
540
continue;
541
case ':':
542
error(2, "%s", opt_info.arg);
543
continue;
544
}
545
break;
546
}
547
argv += opt_info.index;
548
if (error_info.errors)
549
error(ERROR_USAGE|4, "%s", optusage(NiL));
550
n = (strlen(format) + 1) * 4 + 1;
551
if (!(state.format4 = newof(0, char, n, 0)))
552
error(ERROR_SYSTEM|3, "out of space [format]");
553
sfsprintf(state.format4, n, "%%%s%%%s%%%s%%%s", format, format, format, format);
554
state.format1 = state.format4 + 3 * (strlen(format) + 1);
555
if (desc && !(dp = sfopen(NiL, desc, "r")))
556
error(ERROR_SYSTEM|3, "%s: cannot open description file", desc);
557
if (file = *argv)
558
argv++;
559
do
560
{
561
if (!file || streq(file, "-"))
562
fp = sfstdin;
563
else if (!(fp = sfopen(NiL, file, "r")))
564
error(ERROR_SYSTEM|3, "%s: cannot read", file);
565
if (desc)
566
describe(dp, desc, fp, file, verbose);
567
else
568
rectify(fp, file, verbose);
569
} while (file = *argv++);
570
return error_info.errors != 0;;
571
}
572
573