Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libvcodex/Vcdelta/vcdelta.c
1810 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 2003-2012 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Phong Vo <[email protected]> *
18
* *
19
***********************************************************************/
20
#include "vcdhdr.h"
21
22
/* Delta compression based on the Vcdiff data format.
23
**
24
** Written by Kiem-Phong Vo ([email protected])
25
*/
26
27
#define SLOP 64 /* max overhead space needed for output */
28
29
#define SFXSORT 00001 /* use suffix sorting for lz-parsing */
30
#define FREETABLE 00010 /* free the coding table on closing */
31
32
static Vcmtarg_t _Diffargs[] =
33
{ { "s", "String matching via suffix sorting.", (Void_t*)SFXSORT },
34
{ 0 , "String matching via hashing.", 0 }
35
};
36
37
#if __STD_C
38
static void vcdflsadd(Vcdiff_t* vcd, ssize_t dt, ssize_t az)
39
#else
40
static void vcdflsadd(vcd, dt, az)
41
Vcdiff_t* vcd;
42
ssize_t dt;
43
ssize_t az;
44
#endif
45
{
46
int cd;
47
Vcchar_t *data;
48
Vcdtable_t* tbl = vcd->table;
49
Vcdsize_t* siz = vcd->size;
50
Vcdindex_t* idx = vcd->index;
51
52
cd = idx->add[az <= siz->add ? az : 0];
53
vcioputc(vcd->inst, cd);
54
55
if(tbl->code[cd].inst1.size == 0)
56
vcioputu(vcd->inst, az);
57
58
data = dt < vcd->vcpa.nsrc ? vcd->vcpa.src + dt :
59
vcd->vcpa.tar + (dt - vcd->vcpa.nsrc);
60
vcioputs(vcd->data, data, az);
61
}
62
63
#if __STD_C
64
static void vcdflsaddr(Vcdiff_t* vcd, ssize_t ad, ssize_t md)
65
#else
66
static void vcdflsaddr(vcd, ad, md)
67
Vcdiff_t* vcd;
68
ssize_t ad;
69
ssize_t md;
70
#endif
71
{
72
if(md <= VCD_HERE+vcd->cache->s_near)
73
vcioputu(vcd->addr, ad);
74
else vcioputc(vcd->addr, ad); /* must be same[] cache */
75
}
76
77
#if __STD_C
78
static void vcdflscopy(Vcdiff_t* vcd, ssize_t ad, ssize_t cz, ssize_t md)
79
#else
80
static void vcdflscopy(vcd, ad, cz, md)
81
Vcdiff_t* vcd;
82
ssize_t ad;
83
ssize_t cz;
84
ssize_t md;
85
#endif
86
{
87
int cd;
88
Vcdtable_t* tbl = vcd->table;
89
Vcdsize_t* siz = vcd->size;
90
Vcdindex_t* idx = vcd->index;
91
92
cd = idx->copy[md][cz <= siz->copy[md] ? cz : 0];
93
vcioputc(vcd->inst, cd);
94
if(tbl->code[cd].inst1.size == 0)
95
vcioputu(vcd->inst, cz);
96
97
vcdflsaddr(vcd, ad, md);
98
}
99
100
#if __STD_C
101
static ssize_t vcdputcopy(Vcdiff_t* vcd, ssize_t dt, ssize_t cz, ssize_t mt)
102
#else
103
static ssize_t vcdputcopy(vcd, dt, cz, mt)
104
Vcdiff_t* vcd;
105
ssize_t dt; /* data position */
106
ssize_t cz; /* size of matched data */
107
ssize_t mt; /* matched position */
108
#endif
109
{
110
ssize_t ad, md, oz;
111
int cd;
112
Vcchar_t *data;
113
Vcdsave_t* sav = vcd->save;
114
Vcdsize_t* siz = vcd->size;
115
Vcdindex_t* idx = vcd->index;
116
117
if((oz = cz) > 0) /* compute values to be encoded */
118
ad = vcdkasetaddr(vcd->cache, mt, dt, &md);
119
else ad = md = -1;
120
121
if(sav->dtsz > 0)
122
{ if(sav->mode >= 0) /* saved COPY, no merged COPY+COPY */
123
vcdflscopy(vcd, sav->addr, sav->dtsz, sav->mode);
124
else /* saved ADD */
125
{ if(cz <= 0 || sav->dtsz > siz->add1[md] || cz > siz->copy2[md])
126
vcdflsadd(vcd, sav->addr, sav->dtsz);
127
else
128
{ cd = idx->addcopy[sav->dtsz][md][cz];
129
vcioputc(vcd->inst, cd);
130
131
if(sav->addr < vcd->vcpa.nsrc)
132
data = vcd->vcpa.src + sav->addr;
133
else data = vcd->vcpa.tar + (sav->addr - vcd->vcpa.nsrc);
134
vcioputs(vcd->data, data, sav->dtsz);
135
136
vcdflsaddr(vcd, ad, md); cz = 0;
137
}
138
}
139
}
140
141
sav->dtsz = cz; /* save current COPY instruction */
142
sav->addr = ad;
143
sav->mode = md;
144
145
return oz;
146
}
147
148
#if __STD_C
149
static ssize_t vcdputadd(Vcdiff_t* vcd, ssize_t dt, ssize_t az)
150
#else
151
static ssize_t vcdputadd(vcd, dt, az)
152
Vcdiff_t* vcd;
153
ssize_t dt;
154
ssize_t az;
155
#endif
156
{
157
int cd;
158
Vcchar_t *data;
159
Vcdsave_t* sav = vcd->save;
160
Vcdsize_t* siz = vcd->size;
161
Vcdindex_t* idx = vcd->index;
162
ssize_t oz = az;
163
164
if(sav->dtsz > 0) /* there is a saved instruction */
165
{ if(sav->mode < 0) /* parsing error? no two ADDs in a row possible */
166
return -1;
167
168
if(sav->dtsz <= siz->copy1[sav->mode] && az <= siz->add2[sav->mode])
169
{ cd = idx->copyadd[sav->mode][sav->dtsz][az];
170
vcioputc(vcd->inst, cd);
171
172
vcdflsaddr(vcd, sav->addr, sav->mode);
173
174
if(dt < vcd->vcpa.nsrc)
175
data = vcd->vcpa.src + dt;
176
else data = vcd->vcpa.tar + (dt - vcd->vcpa.nsrc);
177
vcioputs(vcd->data, data, az); az = 0;
178
}
179
else vcdflscopy(vcd, sav->addr, sav->dtsz, sav->mode);
180
}
181
182
sav->dtsz = az; /* save current ADD instruction */
183
sav->addr = dt;
184
sav->mode = -1;
185
186
return oz;
187
}
188
189
#if __STD_C
190
static ssize_t vcdputinst(Vclzparse_t* vcpa, int type, Vclzmatch_t* mtch, ssize_t n)
191
#else
192
static ssize_t vcdputinst(vcpa, type, mtch, n)
193
Vclzparse_t* vcpa;
194
int type; /* type of instruction */
195
Vclzmatch_t* mtch; /* list of matched fragments */
196
ssize_t n; /* number of fragments */
197
#endif
198
{
199
ssize_t len, l, here;
200
Vcdiff_t *vcd = (Vcdiff_t*)vcpa;
201
202
#if 0
203
{ Vcmatch_t* m;
204
if((m = mtch)->mpos < 0)
205
{ PRINT(2, "add %6d ", mtch->size);
206
PRINT(2, "pos %6d\n", mtch->tpos - vcpa->nsrc);
207
}
208
if(m->mpos >= 0 || n > 1)
209
{ if(m->mpos < 0)
210
m += 1;
211
PRINT(2, "cpy %6d ", mtch[n-1].tpos+mtch[n-1].size - m->tpos);
212
PRINT(2, "pos %6d ", m->tpos - vcpa->nsrc);
213
PRINT(2, "adr %6d ", m->mpos);
214
if((l = n - (m == mtch ? 0 : 1)) > 1 )
215
PRINT(2,"#frags %6d", l);
216
WRITE(2,"\n",1);
217
}
218
}
219
#endif
220
for(len = 0, here = mtch->tpos; n > 0; ++mtch, --n)
221
{ if((l = mtch->tpos - here) > 0)
222
{ if(vcdputadd(vcd, here, l) != l )
223
return -1;
224
len += l;
225
here += l;
226
}
227
228
if(mtch->size <= 0)
229
continue;
230
else if(mtch->mpos < 0)
231
{ if((l = vcdputadd(vcd, mtch->tpos, mtch->size)) != mtch->size )
232
return -1;
233
len += l;
234
here += l;
235
}
236
else
237
{ if((l = vcdputcopy(vcd, mtch->tpos, mtch->size, mtch->mpos)) != mtch->size)
238
return -1;
239
len += l;
240
here += l;
241
}
242
}
243
244
return len;
245
}
246
247
#if __STD_C
248
static ssize_t vcddiff(Vcodex_t* vc, const Void_t* tar, size_t ntar, Void_t** del)
249
#else
250
static ssize_t vcddiff(vc, tar, ntar, del)
251
Vcodex_t* vc;
252
Void_t* tar;
253
size_t ntar;
254
Void_t** del;
255
#endif
256
{
257
ssize_t i, a, d, n, nsrc;
258
Vcchar_t *rd, *ri, *ra, *p, ctrl, *output;
259
Vcio_t inst, addr, data;
260
Vcdsave_t save;
261
Vcdiff_t *vcd = vcgetmtdata(vc, Vcdiff_t*);
262
Vcdisc_t *disc = vcgetdisc(vc);
263
264
if(ntar == 0)
265
return 0;
266
267
nsrc = disc ? disc->size : 0;
268
vcd->vcpa.src = nsrc == 0 ? NIL(Vcchar_t*) : (Vcchar_t*)disc->data;
269
vcd->vcpa.nsrc = nsrc;
270
vcd->vcpa.tar = (Vcchar_t*)tar;
271
vcd->vcpa.ntar = ntar;
272
vcd->vcpa.mmin = COPYMIN;
273
vcd->vcpa.cmap = NIL(Vcchar_t*);
274
vcd->vcpa.type = 0;
275
276
/* obtain output buffer */
277
if(!(output = vcbuffer(vc, NIL(Vcchar_t*), 3*(ntar+SLOP), 0)) )
278
return -1;
279
vcd->data = &data; vcioinit(&data, output+SLOP, ntar);
280
vcd->inst = &inst; vcioinit(&inst, vcionext(&data)+ntar, ntar+SLOP);
281
vcd->addr = &addr; vcioinit(&addr, vcionext(&inst)+ntar+SLOP, ntar+SLOP);
282
283
/* initialize address caches and saved COPY instruction */
284
vcdkaclear(vcd->cache);
285
vcd->save = &save; save.dtsz = save.addr = save.mode = 0;
286
287
vclzparse(&vcd->vcpa, (vcd->flags&SFXSORT) ? -1 : 32*1024);
288
vcdputcopy(vcd, 0, 0, 0);
289
290
ctrl = 0;
291
d = vciosize(vcd->data); rd = vciodata(vcd->data);
292
i = vciosize(vcd->inst); ri = vciodata(vcd->inst);
293
a = vciosize(vcd->addr); ra = vciodata(vcd->addr);
294
if(vc->coder) /* continuation coding */
295
{ if((n = vcapply(vc->coder, rd, d, &p)) >= 0 && n < d )
296
{ d = n; rd = p;
297
ctrl |= VCD_DATACOMPRESS;
298
}
299
if((n = vcapply(vc->coder, ri, i, &p)) >= 0 && n < i )
300
{ i = n; ri = p;
301
ctrl |= VCD_INSTCOMPRESS;
302
}
303
if((n = vcapply(vc->coder, ra, a, &p)) >= 0 && n < a )
304
{ a = n; ra = p;
305
ctrl |= VCD_ADDRCOMPRESS;
306
}
307
}
308
309
/* output data */
310
vcioinit(&data, output, d+i+a+SLOP);
311
vcioputu(&data, ntar);
312
vcioputc(&data, ctrl);
313
vcioputu(&data, d);
314
vcioputu(&data, i);
315
vcioputu(&data, a);
316
vcioputs(&data, rd, d);
317
vcioputs(&data, ri, i);
318
vcioputs(&data, ra, a);
319
n = vciosize(&data);
320
321
if(!(output = vcbuffer(vc, output, n, -1)) ) /* truncate to size */
322
return -1;
323
if(del)
324
*del = output;
325
return n;
326
}
327
328
329
#if __STD_C
330
static ssize_t vcdundiff(Vcodex_t* vc, const Void_t* del, size_t ndel, Void_t** out)
331
#else
332
static ssize_t vcdundiff(vc, del, ndel, out)
333
Vcodex_t* vc;
334
Void_t* del;
335
size_t ndel;
336
Void_t** out;
337
#endif
338
{
339
Vcchar_t *t, *s;
340
ssize_t d, i, a, ntar, nsrc;
341
Vcchar_t *tar, *etar, *src, *rd, *ri, *ra;
342
Vcio_t data, inst, addr;
343
Vcdcode_t *code;
344
Vcdcache_t *ka;
345
int ctrl;
346
Vcdiff_t *vcd = vcgetmtdata(vc, Vcdiff_t*);
347
Vcdisc_t *disc = vcgetdisc(vc);
348
349
/* read size of data buffers */
350
vcioinit(&data, del, ndel);
351
ntar = (ssize_t)vciogetu(&data);/* buffer size for target data */
352
ctrl = (int)vciogetc(&data); /* to see if datasets were compressed */
353
d = (ssize_t)vciogetu(&data); /* size of unmatched data */
354
i = (ssize_t)vciogetu(&data); /* size of instruction set */
355
a = (ssize_t)vciogetu(&data); /* size of COPY addresses */
356
357
/* make sure we have enough data for decoding */
358
if((d+i+a) != vciomore(&data) )
359
RETURN(-1);
360
361
/* data, instructions and COPY addresses */
362
rd = vcionext(&data);
363
ri = rd+d;
364
ra = ri+i;
365
366
/* recompute the data, instruction and address streams if encoded */
367
if(vc->coder)
368
{ if(ctrl&VCD_DATACOMPRESS)
369
{ if((d = vcapply(vc->coder, rd, d, &rd)) < 0 )
370
RETURN(-1);
371
}
372
if(ctrl&VCD_INSTCOMPRESS)
373
{ if((i = vcapply(vc->coder, ri, i, &ri)) < 0 )
374
RETURN(-1);
375
}
376
if(ctrl&VCD_ADDRCOMPRESS)
377
{ if((a = vcapply(vc->coder, ra, a, &ra)) < 0 )
378
RETURN(-1);
379
}
380
}
381
382
vcioinit(&data, rd, d);
383
vcioinit(&inst, ri, i);
384
vcioinit(&addr, ra, a);
385
386
code = vcd->table->code;
387
if((ka = vcd->cache) )
388
vcdkaclear(ka);
389
390
/* buffer to reconstruct data */
391
if(!(tar = vcbuffer(vc, NIL(Vcchar_t*), ntar, 0)) )
392
RETURN(-1);
393
etar = tar + ntar;
394
nsrc = disc ? disc->size : 0;
395
src = disc ? (Vcchar_t*)disc->data : NIL(Vcchar_t*);
396
397
for(t = tar; t < etar; )
398
{ Vcdcode_t* cd;
399
Vcdinst_t* in;
400
ssize_t sz;
401
402
/* get the pair of instructions */
403
if(vciomore(&inst) <= 0)
404
RETURN(-1);
405
cd = code + vciogetc(&inst);
406
407
for(i = 0; i < 2; ++i)
408
{ in = i == 0 ? &cd->inst1 : &cd->inst2;
409
if(in->type == VCD_NOOP)
410
continue;
411
412
if((sz = in->size) == 0)
413
{ if(vciomore(&inst) <= 0)
414
RETURN(-1);
415
sz = vciogetu(&inst);
416
}
417
418
if(t+sz > etar)
419
RETURN(-1);
420
421
if(in->type == VCD_BYTE)
422
{ d = in->mode;
423
for(; sz > 0; --sz)
424
*t++ = (Vcchar_t)d;
425
}
426
else if(in->type == VCD_COPY)
427
{ if(vciomore(&addr) <= 0)
428
RETURN(-1);
429
d = vcdkagetaddr(ka,&addr,(t-tar)+nsrc,in->mode);
430
if(d < nsrc)
431
{ if(d+sz > nsrc)
432
RETURN(-1);
433
s = src+d;
434
}
435
else
436
{ if((d -= nsrc) >= (t-tar) || (d+sz) > ntar)
437
RETURN(-1);
438
s = tar+d;
439
}
440
for(; sz > 0; --sz)
441
*t++ = *s++;
442
}
443
else if(in->type == VCD_ADD)
444
{ if(vciomore(&data) < sz)
445
RETURN(-1);
446
vciogets(&data, t, sz);
447
t += sz;
448
}
449
else if(in->type == VCD_RUN)
450
{ if(vciomore(&data) <= 0)
451
RETURN(-1);
452
d = vciogetc(&data);
453
for(; sz > 0; --sz)
454
*t++ = (Vcchar_t)d;
455
}
456
else RETURN(-1);
457
}
458
}
459
460
if(vciomore(&data) != 0 || vciomore(&inst) != 0 || vciomore(&addr) != 0)
461
return -1;
462
463
if(out)
464
*out = (Void_t*)tar;
465
466
return ntar;
467
}
468
469
#if __STD_C
470
static int vcdevent(Vcodex_t* vc, int type, Void_t* init)
471
#else
472
static int vcdevent(vc, type, init)
473
Vcodex_t* vc;
474
int type;
475
Void_t* init;
476
#endif
477
{
478
Vcdiff_t *vcd;
479
Vcmtarg_t *arg;
480
481
_vcdtblinit(); /* initialize default code tables */
482
483
if(type == VC_OPENING)
484
{ if(!(vcd = (Vcdiff_t*)calloc(1,sizeof(Vcdiff_t))) )
485
return -1;
486
VCDINIT(vcd);
487
488
vcgetmtarg((char*)init, 0, 0, _Diffargs, &arg);
489
if(arg)
490
vcd->flags |= TYPECAST(int,arg->data);
491
492
vcd->table = _Vcdtbl;
493
vcd->index = &_Vcdindex;
494
vcd->size = &_Vcdsize;
495
vcd->vcpa.parsef = vcdputinst;
496
if(!(vcd->cache = vcdkaopen(vcd->table->s_near,vcd->table->s_same)))
497
{ free(vcd);
498
return -1;
499
}
500
501
vcsetmtdata(vc, vcd);
502
return 0;
503
}
504
else if(type == VC_CLOSING)
505
{ if((vcd = vcgetmtdata(vc, Vcdiff_t*)) )
506
{ if(vcd->cache)
507
vcdkaclose(vcd->cache);
508
if(vcd->table && (vcd->flags&FREETABLE) )
509
free(vcd->table);
510
free(vcd);
511
}
512
513
vcsetmtdata(vc, NIL(Vcdiff_t*));
514
return 0;
515
}
516
else return 0;
517
}
518
519
Vcmethod_t _Vcdelta =
520
{ vcddiff,
521
vcdundiff,
522
vcdevent,
523
"delta", "(Delta) Compression using the Vcdiff format.",
524
"[-version?delta (AT&T Research) 2003-01-01]" USAGE_LICENSE,
525
_Diffargs,
526
1024*1024,
527
VC_MTSOURCE
528
};
529
530
VCLIB(Vcdelta)
531
532