Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libpz/pzconvert.c
1808 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1998-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Glenn Fowler <[email protected]> *
18
* *
19
***********************************************************************/
20
#pragma prototyped
21
22
/*
23
* pzip conversion/checksum discipline
24
*
25
* pz->discdata State_t*
26
* pp->discdata Cvt_t*
27
*/
28
29
static const char usage[] =
30
"[-1l?\n@(#)$Id: pzip conversion library (AT&T Research) 1999-09-11 $\n]"
31
USAGE_LICENSE
32
"[+LIBRARY?pzconvert - pzip conversion library]"
33
"[+DESCRIPTION?The \bpzip\b convert discipline supports runtime record"
34
" format conversion. The discipline is enabled by a \b--library\b"
35
" that provides a conversion table and functions to \bpzconvert\b(3).]"
36
"[x:checksum?Enables a decompressed data checksum. The checksum is appended"
37
" to the compressed data as a \bpzip\b trailer. This checksum is"
38
" checked on decompression and a diagnostic is issued on mismatch."
39
" The absence of a checksum trailer is not treated as an error unless"
40
" \b--checksum=warn\b is specified.]:?[warn]"
41
"[c:convert?Specifies the input format for compression and the output format"
42
" for decompression. \aformat\a may be omitted for self-identifying"
43
" input data (e.g., \bpzip\b files.)]:?[format]"
44
"[f:from?Specifies the input format for data that does not self-identify.]:"
45
" [format]"
46
"[s:show?Lists the conversion steps, if any, on the standard output, and exits."
47
" A diagnostic is issued if the conversion is not implemented. If"
48
" \ball\b is specified then a description of all supported formats is"
49
" listed on the standard output and \bpzip\b exits.]:?[all]"
50
"[t:to?Specifies the decompression output format. This option is not needed"
51
" for compression since the partition file determines the output"
52
" format.]:[format]"
53
"[v:verbose?Emits a message on the standard error if checksumming is enabled"
54
" and another message showing the conversions being applied, if any.]"
55
;
56
57
#include "pzlib.h"
58
59
#define CHECKSUM_OP SFDCEVENT('P','Z',1)
60
61
#define CHECKSUM (1<<0) /* checksum enabled */
62
#define CHECKSUM_TAIL (1<<1) /* CHECKSUM tail handled */
63
#define CHECKSUM_WARN (1<<2) /* warn if no checksum trailer */
64
#define CONVERT (1<<3) /* conversion enabled */
65
#define SHOW (1<<4) /* show conversions and exit */
66
67
struct Chain_s; typedef struct Chain_s Chain_t;
68
69
struct Chain_s
70
{
71
Chain_t* next;
72
Pzconvert_t* convert;
73
unsigned char* buf;
74
};
75
76
typedef struct
77
{
78
unsigned long flags;
79
uint32_t checksum;
80
Chain_t* chain;
81
Chain_t* last;
82
} Cvt_t;
83
84
typedef struct
85
{
86
unsigned long flags;
87
char* from;
88
char* to;
89
unsigned char* buf;
90
Pzconvert_t* conversions;
91
Pzread_f readf;
92
Pzwrite_f writef;
93
Pzevent_f eventf;
94
Sfio_t* tmp;
95
} State_t;
96
97
/*
98
* compute the incremental linear congruential hash checksum
99
*/
100
101
static uint32_t
102
memsum_4(register uint32_t sum, const void* buf, size_t size)
103
{
104
register unsigned char* s = (unsigned char*)buf;
105
register unsigned char* e = s + size;
106
107
while (s < e)
108
sum = (sum << 4) + sum + *s++ + 97531;
109
return sum;
110
}
111
112
/*
113
* read a row from sp and apply the conversion and/or checksum
114
* before it is deflated
115
*/
116
117
static ssize_t
118
cvtread(Pz_t* pz, Sfio_t* sp, void* data, Pzdisc_t* disc)
119
{
120
register State_t* state = (State_t*)pz->discdata;
121
register Pzpart_t* pp = pz->part;
122
register Cvt_t* cvt = (Cvt_t*)pp->discdata;
123
register Chain_t* cp;
124
ssize_t n;
125
unsigned char* s;
126
unsigned char* t;
127
128
if (cvt->flags & CONVERT)
129
{
130
cvt->last->buf = (unsigned char*)data;
131
again:
132
cp = cvt->chain;
133
if (state->readf)
134
{
135
s = state->buf;
136
if ((n = (*state->readf)(pz, sp, s, disc)) <= 0)
137
return n;
138
}
139
else if (!(s = (unsigned char*)sfreserve(sp, cp->convert->from->row, 0)))
140
return sfvalue(sp) ? -1 : 0;
141
for (; cp; cp = cp->next)
142
{
143
pz->count.converted++;
144
t = cp->buf;
145
if ((n = (*cp->convert->convertf)(pz, cp->convert, s, t, disc)) <= 0)
146
{
147
if (n < 0)
148
return -1;
149
goto again;
150
}
151
s = t;
152
}
153
}
154
else
155
n = state->readf ? (*state->readf)(pz, sp, data, disc) : sfread(sp, data, pp->row);
156
if (n > 0 && (cvt->flags & CHECKSUM))
157
cvt->checksum = memsum_4(cvt->checksum, data, n);
158
return n;
159
}
160
161
/*
162
* apply the conversion and/or checksum to a row that was just inflated
163
* and write it to sp
164
*/
165
166
static ssize_t
167
cvtwrite(Pz_t* pz, Sfio_t* sp, const void* data, Pzdisc_t* disc)
168
{
169
register State_t* state = (State_t*)pz->discdata;
170
register Pzpart_t* pp = pz->part;
171
register Cvt_t* cvt = (Cvt_t*)pp->discdata;
172
register Chain_t* cp;
173
ssize_t n;
174
unsigned char* b;
175
unsigned char* s;
176
unsigned char* t;
177
178
if (cvt->flags & CONVERT)
179
{
180
if (state->writef)
181
b = state->buf;
182
else if (!(b = (unsigned char*)sfreserve(sp, cvt->last->convert->to->row, 1)))
183
return -1;
184
cvt->last->buf = b;
185
s = (unsigned char*)data;
186
for (cp = cvt->chain; cp; cp = cp->next)
187
{
188
pz->count.converted++;
189
t = cp->buf;
190
if ((n = (*cp->convert->convertf)(pz, cp->convert, s, t, disc)) <= 0)
191
{
192
if (!state->writef)
193
sfwrite(sp, b, 0);
194
return n;
195
}
196
s = t;
197
}
198
}
199
else
200
{
201
if (cvt->flags & CHECKSUM)
202
cvt->checksum = memsum_4(cvt->checksum, data, pp->row);
203
b = (unsigned char*)data;
204
n = pp->row;
205
}
206
return state->writef ? (*state->writef)(pz, sp, b, disc) : sfwrite(sp, b, n);
207
}
208
209
/*
210
* add the next links to the stack that get from stk[i..j] => to
211
* the recursion unwind returns a shortest chain of links
212
* 0 returned if there is no path => to
213
*/
214
215
static Chain_t*
216
closure(Pz_t* pz, Chain_t* cp, int n, Pzconvert_t* tab, unsigned char* hit, Pzconvert_t** stk, int i, int j, const char* to)
217
{
218
int k;
219
int m;
220
Chain_t* xp;
221
222
for (k = j; i < j; i++)
223
for (m = 0; m < n; m++)
224
if (!hit[m] && streq(tab[m].from->name, stk[i]->to->name))
225
{
226
if (streq(tab[m].to->name, to))
227
{
228
stk[k] = &tab[m];
229
m = k;
230
goto found;
231
}
232
stk[k++] = &tab[m];
233
hit[m] = 1;
234
}
235
if (k == j || !(cp = closure(pz, cp, n, tab, hit, stk, j, k, to)))
236
return 0;
237
to = cp->convert->from->name;
238
for (m = j; m < k && !streq(stk[m]->to->name, to); m++);
239
if (m >= k)
240
{
241
if (pz->disc->errorf)
242
(*pz->disc->errorf)(pz, pz->disc, 2, "internal closure error -- %s not found on unwind stack", to);
243
return 0;
244
}
245
found:
246
xp = vmnewof(pz->vm, 0, Chain_t, 1, 0);
247
xp->next = cp;
248
xp->convert = stk[m];
249
return xp;
250
}
251
252
/*
253
* determine a shortest path of conversions from f => t in tab of n elements
254
*/
255
256
static Chain_t*
257
chain(Pz_t* pz, Pzconvert_t* tab, int n, const char* f, const char* t)
258
{
259
register State_t* state = (State_t*)pz->discdata;
260
int i;
261
int j;
262
size_t m;
263
unsigned char* a;
264
unsigned char* b;
265
unsigned char* x;
266
Chain_t* cp;
267
Chain_t* tp;
268
unsigned char* hit;
269
Pzconvert_t** stk;
270
271
if (!(hit = newof(0, unsigned char, n, 0)))
272
return 0;
273
if (!(stk = newof(0, Pzconvert_t*, n, 0)))
274
{
275
free(hit);
276
return 0;
277
}
278
for (i = j = 0; i < n; i++)
279
if (streq(tab[i].from->name, f))
280
{
281
stk[j++] = &tab[i];
282
hit[i] = 1;
283
}
284
if (!j || !(cp = closure(pz, NiL, n, tab, hit, stk, 0, j, t)))
285
{
286
free(hit);
287
free(stk);
288
if (pz->disc->errorf)
289
{
290
if (!j)
291
(*pz->disc->errorf)(pz, pz->disc, 2, "conversion to %s not implemented", t);
292
else
293
(*pz->disc->errorf)(pz, pz->disc, 2, "conversion from %s to %s not implemented", f, t);
294
}
295
return 0;
296
}
297
free(hit);
298
t = cp->convert->from->name;
299
for (i = 0; i < j && !streq(stk[i]->to->name, t); i++);
300
if (i >= j)
301
{
302
free(stk);
303
if (pz->disc->errorf)
304
(*pz->disc->errorf)(pz, pz->disc, 2, "internal closure error -- %s not found on unwind stack", t);
305
return 0;
306
}
307
tp = vmnewof(pz->vm, 0, Chain_t, 1, 0);
308
tp->next = cp;
309
tp->convert = stk[i];
310
cp = tp;
311
free(stk);
312
313
/*
314
* determine the largest convert to row size
315
* and allocate the temporary buffers
316
*/
317
318
m = 0;
319
for (tp = cp; tp && tp->next; tp = tp->next)
320
if (tp->convert->to->row > m)
321
m = tp->convert->to->row;
322
if (m)
323
{
324
a = b = 0;
325
for (tp = cp; tp && tp->next; tp = tp->next)
326
{
327
if (!a && !(a = vmnewof(pz->vm, 0, unsigned char, m, 0)))
328
return 0;
329
tp->buf = a;
330
x = a;
331
a = b;
332
b = x;
333
}
334
if ((state->readf || state->writef) && !(state->buf = vmnewof(pz->vm, 0, unsigned char, m, 0)))
335
return 0;
336
}
337
return cp;
338
}
339
340
/*
341
* handle pzip events
342
*/
343
344
static int
345
cvtevent(Pz_t* pz, int op, void* data, size_t size, Pzdisc_t* disc)
346
{
347
register State_t* state = (State_t*)pz->discdata;
348
register Pzpart_t* pp = pz->part;
349
register Cvt_t* cvt;
350
register Pzconvert_t* xp;
351
Chain_t* cp;
352
Pzconvert_t* zp;
353
Pzconvert_t* rp;
354
char* f;
355
char* t;
356
char** vp;
357
int i;
358
size_t n;
359
unsigned long k;
360
Sfio_t* sp;
361
Pz_t* iz;
362
363
int r = 0;
364
365
if (state->eventf && (r = (*state->eventf)(pz, op, data, size, disc)))
366
return r;
367
if (!pp)
368
{
369
if (op == PZ_OPTION)
370
{
371
switch (optstr(NiL, usage))
372
{
373
case 'f':
374
if (!pz->row)
375
for (xp = state->conversions; ; xp++)
376
if (!xp->from)
377
{
378
if (disc->errorf)
379
(*disc->errorf)(pz, disc, 2, "%s: unknown format", opt_info.arg);
380
return -1;
381
}
382
else if (streq(opt_info.arg, xp->from->name))
383
{
384
pz->row = xp->from->row;
385
break;
386
}
387
else if (streq(opt_info.arg, xp->to->name))
388
{
389
pz->row = xp->to->row;
390
break;
391
}
392
break;
393
case 's':
394
if (opt_info.arg)
395
{
396
for (xp = state->conversions; xp->from; xp++);
397
i = (xp - state->conversions) * 2 + 1;
398
if (!(vp = vmnewof(pz->vm, 0, char*, i, 0)))
399
exit(1);
400
sfprintf(sfstdout, "%-16s %5s %s\n", "NAME", "ROW", "DESCRIPTION");
401
for (xp = state->conversions; xp->from; xp++)
402
{
403
for (i = 0; vp[i] && !streq(xp->from->name, vp[i]); i++);
404
if (!vp[i])
405
{
406
vp[i++] = (char*)xp->from->name;
407
sfprintf(sfstdout, "%-16s %5u %s\n", xp->from->name, xp->from->row, xp->from->description);
408
}
409
for (i = 0; vp[i] && !streq(xp->to->name, vp[i]); i++);
410
if (!vp[i])
411
{
412
vp[i++] = (char*)xp->to->name;
413
sfprintf(sfstdout, "%-16s %5u %s\n", xp->to->name, xp->to->row, xp->to->description);
414
}
415
}
416
exit(0);
417
}
418
break;
419
}
420
}
421
return 0;
422
}
423
if (!(cvt = (Cvt_t*)pp->discdata))
424
{
425
if (!(cvt = vmnewof(pz->vm, 0, Cvt_t, 1, 0)))
426
return -1;
427
pp->discdata = (void*)cvt;
428
}
429
switch (op)
430
{
431
case PZ_CLOSE:
432
if ((pz->flags & PZ_READ) && (cvt->flags & (CHECKSUM|CHECKSUM_WARN|CHECKSUM_TAIL)) == (CHECKSUM|CHECKSUM_WARN))
433
{
434
r = -1;
435
if (disc->errorf)
436
(*disc->errorf)(pz, disc, 1, "%s: no checksum -- expected 0x%08I*x", pz->path, sizeof(cvt->checksum), cvt->checksum);
437
}
438
if (cvt->flags & CONVERT)
439
for (cp = cvt->chain; cp; cp = cp->next)
440
if (cp->convert->eventf && (r = (*cp->convert->eventf)(pz, op, cp->convert, cp->next == 0, disc)))
441
break;
442
sfstrclose(state->tmp);
443
break;
444
case PZ_OPTION:
445
switch (optstr(NiL, usage))
446
{
447
case 'x':
448
r = 1;
449
state->flags |= CHECKSUM;
450
if (opt_info.arg)
451
state->flags |= CHECKSUM_WARN;
452
if (disc->errorf && (pz->flags & PZ_NOPZIP) && (state->flags & CHECKSUM_WARN))
453
(*disc->errorf)(pz, disc, 1, "%s: enabled for pzip data only", opt_info.name);
454
break;
455
case 'c':
456
case 'f':
457
case 't':
458
r = 1;
459
if (opt_info.arg && *opt_info.arg)
460
{
461
for (xp = state->conversions;; xp++)
462
{
463
if (!xp->from)
464
{
465
if (disc->errorf)
466
(*disc->errorf)(pz, disc, 2, "%s: unknown format", opt_info.arg);
467
return -1;
468
}
469
if (streq(opt_info.arg, xp->from->name) || streq(opt_info.arg, xp->to->name))
470
break;
471
}
472
switch (opt_info.option[1])
473
{
474
case 'c':
475
vp = (pz->flags & PZ_WRITE) ? &state->from : &state->to;
476
break;
477
case 'f':
478
vp = &state->from;
479
break;
480
case 't':
481
vp = &state->to;
482
break;
483
}
484
if ((!*vp || !streq(opt_info.arg, *vp)) && !(*vp = vmstrdup(pz->vm, opt_info.arg)))
485
return -1;
486
state->flags |= CONVERT;
487
}
488
else if (pz->flags & PZ_WRITE)
489
state->flags |= CONVERT;
490
break;
491
case 's':
492
r = 1;
493
state->flags |= SHOW;
494
break;
495
default:
496
if (cvt->flags & CONVERT)
497
for (cp = cvt->chain; cp; cp = cp->next)
498
if (cp->convert->eventf && (r = (*cp->convert->eventf)(pz, op, data, size, disc)))
499
break;
500
break;
501
}
502
break;
503
case PZ_PARTITION:
504
if (state->flags & CONVERT)
505
{
506
if (pz->flags & PZ_WRITE)
507
{
508
if (sfraise(pz->io, SFPZ_HANDLE, &iz) <= 0)
509
{
510
if (disc->errorf)
511
(*disc->errorf)(pz, disc, 2, "%s: cannot determine input format", pz->path);
512
return -1;
513
}
514
if (state->from && !*iz->part->name)
515
{
516
n = 0;
517
f = state->from;
518
}
519
else
520
{
521
n = iz->part->row;
522
f = iz->part->name;
523
}
524
t = pp->name;
525
}
526
else
527
{
528
if (!state->to)
529
{
530
if (disc->errorf)
531
(*disc->errorf)(pz, disc, 2, "ouput convert format omitted");
532
return -1;
533
}
534
if (state->from && !*pp->name)
535
{
536
n = 0;
537
f = state->from;
538
}
539
else
540
{
541
n = *pp->name ? 0 : pp->row;
542
f = pp->name;
543
}
544
t = state->to;
545
}
546
if (!streq(f, t))
547
{
548
cp = 0;
549
for (xp = state->conversions, rp = 0, zp = 0;; xp++)
550
{
551
if (!xp->from)
552
{
553
i = xp - state->conversions;
554
if (xp = zp)
555
{
556
if (n != xp->to->row)
557
{
558
if (!(cp = vmnewof(pz->vm, 0, Chain_t, 1, 0)))
559
return -1;
560
cp->convert = xp;
561
}
562
break;
563
}
564
if (rp)
565
break;
566
if (cp = chain(pz, state->conversions, i, f, t))
567
break;
568
return -1;
569
}
570
if (streq(t, xp->to->name))
571
{
572
if (streq(f, xp->from->name))
573
{
574
if (!(cp = vmnewof(pz->vm, 0, Chain_t, 1, 0)))
575
return -1;
576
cp->convert = xp;
577
break;
578
}
579
if (!zp && n == xp->from->row)
580
zp = xp;
581
if (!rp && n == xp->to->row)
582
rp = xp;
583
}
584
}
585
if (cvt->chain = cp)
586
{
587
cvt->flags |= CONVERT;
588
do
589
{
590
if (cp->convert->eventf && (*cp->convert->eventf)(pz, op, cp->convert, cp->next == 0, disc) < 0)
591
return -1;
592
cvt->last = cp;
593
} while (cp = cp->next);
594
}
595
}
596
}
597
if ((cvt->flags |= (state->flags & CHECKSUM)) & (CHECKSUM|CONVERT))
598
{
599
disc->readf = cvtread;
600
disc->writef = cvtwrite;
601
if (disc->errorf && (pz->flags & PZ_VERBOSE))
602
{
603
if (cvt->flags & CHECKSUM)
604
(*disc->errorf)(pz, disc, 0, "%s: %s checksum", pz->path, (pz->flags & PZ_WRITE) ? "generating" : "verifying");
605
if (cvt->flags & CONVERT)
606
{
607
sfprintf(pz->tmp, "%s", cvt->chain->convert->from->name);
608
for (cp = cvt->chain; cp; cp = cp->next)
609
sfprintf(pz->tmp, " => %s", cp->convert->to->name);
610
(*disc->errorf)(pz, disc, 0, "%s: convert: %s", pz->path, sfstruse(pz->tmp));
611
}
612
}
613
}
614
if (state->flags & SHOW)
615
{
616
if (cvt->flags & CHECKSUM)
617
{
618
sfprintf(sfstdout, "checksum");
619
if (cvt->flags & CONVERT)
620
sfprintf(sfstdout, " + ");
621
}
622
if (cvt->flags & CONVERT)
623
{
624
sfprintf(sfstdout, "%s", cvt->chain->convert->from->name);
625
for (cp = cvt->chain; cp; cp = cp->next)
626
sfprintf(sfstdout, " => %s", cp->convert->to->name);
627
}
628
if (cvt->flags & (CHECKSUM|CONVERT))
629
sfprintf(sfstdout, "\n");
630
exit(0);
631
}
632
break;
633
case PZ_REOPEN:
634
cvt->checksum = 0;
635
cvt->flags &= ~CHECKSUM_TAIL;
636
break;
637
case PZ_TAILREAD:
638
if (cvt->flags & CHECKSUM)
639
{
640
sfstrbuf(state->tmp, data, size, 0);
641
if (sfgetu(state->tmp) == CHECKSUM_OP)
642
{
643
cvt->flags |= CHECKSUM_TAIL;
644
if ((k = sfgetu(state->tmp)) != cvt->checksum)
645
{
646
if (disc->errorf)
647
(*disc->errorf)(pz, disc, 2, "%s: checksum mismatch -- expected 0x%08I*x != 0x%08I*x", pz->path, sizeof(k), k, sizeof(cvt->checksum), cvt->checksum);
648
pz->flags |= PZ_ERROR;
649
r = -1;
650
}
651
else
652
r = 1;
653
}
654
}
655
break;
656
case PZ_TAILWRITE:
657
if (cvt->flags & CHECKSUM)
658
{
659
cvt->flags |= CHECKSUM_TAIL;
660
sp = (Sfio_t*)data;
661
sfputu(state->tmp, CHECKSUM_OP);
662
sfputu(state->tmp, cvt->checksum);
663
n = sfstrtell(state->tmp);
664
sfputu(sp, n);
665
sfwrite(sp, sfstrseek(state->tmp, 0, SEEK_SET), n);
666
}
667
break;
668
}
669
return r;
670
}
671
672
/*
673
* install the conversion/checksum table and event function
674
*/
675
676
int
677
pzdcconvert(Pz_t* pz, const Pzconvert_t* conversions)
678
{
679
register State_t* state;
680
681
if (pz->disc->eventf != cvtevent && !(pz->flags & PZ_PUSHED))
682
{
683
if (!(state = vmnewof(pz->vm, 0, State_t, 1, 0)))
684
return -1;
685
if (!(state->tmp = sfstropen()))
686
{
687
vmfree(pz->vm, state);
688
return -1;
689
}
690
state->conversions = (Pzconvert_t*)conversions;
691
state->readf = pz->disc->readf;
692
state->writef = pz->disc->writef;
693
state->eventf = pz->disc->eventf;
694
pz->discdata = (void*)state;
695
pz->disc->eventf = cvtevent;
696
optget(NiL, usage);
697
}
698
return 0;
699
}
700
701