Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libvdelta/vddelta.c
1808 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1995-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Phong Vo <[email protected]> *
18
* *
19
***********************************************************************/
20
#include "vdelhdr.h"
21
22
/* Compute a transformation that takes source data to target data
23
**
24
** Written by Kiem-Phong Vo, [email protected], 5/20/94
25
*/
26
27
#ifdef DEBUG
28
long S_copy, S_add; /* amount of input covered by COPY and ADD */
29
long N_copy, N_add; /* # of COPY and ADD instructions */
30
long M_copy, M_add; /* max size of a COPY or ADD instruction */
31
long N_merge; /* # of merged instructions */
32
#endif
33
34
#define MERGABLE(a,c,k) ((a) > 0 && A_ISTINY(a) && \
35
(c) > 0 && C_ISTINY(c) && \
36
(k) >= K_SELF )
37
38
typedef struct _table_s Table_t;
39
struct _table_s
40
{ Vdio_t io; /* io structure */
41
uchar* src; /* source string */
42
int n_src;
43
uchar* tar; /* target string */
44
int n_tar;
45
K_DDECL(quick,recent,rhere); /* address caches */
46
int* link; /* links of elements */
47
int size; /* size of hash table */
48
int* hash; /* hash table */
49
};
50
51
/* encode and output delta instructions */
52
#if __STD_C
53
static int vdputinst(Table_t* tab, uchar* begs, uchar* here, reg int copy, int n_copy)
54
#else
55
static int vdputinst(tab, begs, here, copy, n_copy)
56
Table_t* tab;
57
uchar* begs; /* ADD data if any */
58
uchar* here; /* current location */
59
reg int copy; /* best match if >= 0 */
60
int n_copy; /* length of match */
61
#endif
62
{
63
reg int n_add, i_add, i_copy, k_type;
64
reg int n, c_addr, best, d;
65
66
n_add = begs ? here-begs : 0; /* add size */
67
c_addr = (here-tab->tar)+tab->n_src; /* current address */
68
k_type = 0;
69
70
if(n_copy > 0) /* process the COPY instruction */
71
{ /**/DBTOTAL(N_copy,1); DBTOTAL(S_copy,n_copy); DBMAX(M_copy,n_copy);
72
73
best = copy;
74
k_type = K_SELF;
75
if((d = c_addr - copy) < best)
76
{ best = d;
77
k_type = K_HERE;
78
}
79
for(n = 0; n < K_RTYPE; ++n)
80
{ if((d = copy - tab->recent[n]) < 0 || d >= best)
81
continue;
82
best = d;
83
k_type = K_RECENT+n;
84
}
85
if(best >= I_MORE && tab->quick[n = copy%K_QSIZE] == copy)
86
{ for(d = K_QTYPE-1; d > 0; --d)
87
if(n >= (d<<VD_BITS) )
88
break;
89
best = n - (d<<VD_BITS); /**/ASSERT(best < (1<<VD_BITS));
90
k_type = K_QUICK+d;
91
}
92
93
/**/ASSERT(best >= 0);
94
/**/ASSERT((k_type+K_MERGE) < (1<<I_BITS) );
95
96
/* update address caches */
97
K_UPDATE(tab->quick,tab->recent,tab->rhere,copy);
98
99
/* see if mergable to last ADD instruction */
100
if(MERGABLE(n_add,n_copy,k_type) )
101
{ /**/DBTOTAL(N_merge,1);
102
i_add = K_TPUT(k_type)|A_TPUT(n_add)|C_TPUT(n_copy);
103
}
104
else
105
{ i_copy = K_PUT(k_type);
106
if(C_ISLOCAL(n_copy) )
107
i_copy |= C_LPUT(n_copy);
108
}
109
}
110
111
if(n_add > 0)
112
{ /**/DBTOTAL(N_add,1); DBTOTAL(S_add,n_add); DBMAX(M_add,n_add);
113
114
if(!MERGABLE(n_add,n_copy,k_type) )
115
i_add = A_ISLOCAL(n_add) ? A_LPUT(n_add) : 0;
116
117
if(VDPUTC((Vdio_t*)tab,i_add) < 0 )
118
return -1;
119
if(!A_ISLOCAL(n_add) &&
120
(*_Vdputu)((Vdio_t*)tab, (ulong)A_PUT(n_add)) < 0 )
121
return -1;
122
if((*_Vdwrite)((Vdio_t*)tab, begs, n_add) < 0 )
123
return -1;
124
}
125
126
if(n_copy > 0)
127
{ if(!MERGABLE(n_add,n_copy,k_type) && VDPUTC((Vdio_t*)tab,i_copy) < 0 )
128
return -1;
129
130
if(!C_ISLOCAL(n_copy) &&
131
(*_Vdputu)((Vdio_t*)tab, (ulong)C_PUT(n_copy)) < 0 )
132
return -1;
133
134
if(k_type >= K_QUICK && k_type < (K_QUICK+K_QTYPE) )
135
{ if(VDPUTC((Vdio_t*)tab,(uchar)best) < 0 )
136
return -1;
137
}
138
else
139
{ if((*_Vdputu)((Vdio_t*)tab, (ulong)best) < 0 )
140
return -1;
141
}
142
}
143
144
return 0;
145
}
146
147
148
/* Fold a string */
149
#if __STD_C
150
static int vdfold(Table_t* tab, reg uchar* fold, reg uchar* endfold, int target)
151
#else
152
static int vdfold(tab, fold, endfold, target)
153
Table_t* tab;
154
reg uchar* fold; /* start of area to fold */
155
reg uchar* endfold; /* end of area to fold */
156
int target; /* 1 if doing the target stream */
157
#endif
158
{
159
reg ulong key, n;
160
reg uchar *sm, *ends, *ss, *endh;
161
reg int m, list, curm, bestm;
162
reg uchar *add;
163
reg int head, len, n_src = tab->n_src;
164
reg uchar *src = tab->src, *tar = tab->tar;
165
reg int size = tab->size, *link = tab->link, *hash = tab->hash;
166
167
if((endfold-fold) < M_MIN) /* not much to do */
168
{ add = fold;
169
goto done;
170
}
171
172
if(target)
173
curm = (fold - tab->tar) + n_src;
174
else curm = (fold - tab->src);
175
bestm = -1;
176
len = M_MIN-1;
177
add = NIL(uchar*);
178
HINIT(key,fold,n);
179
for(;;)
180
{ for(;;) /* search for the longest match */
181
{ if((m = hash[key&size]) < 0)
182
goto endsearch;
183
list = m = link[m]; /* head of list */
184
185
if(bestm >= 0) /* skip over past elements */
186
for(n = bestm+len; m < n;)
187
if((m = link[m]) == list)
188
goto endsearch;
189
190
head = len - (M_MIN-1); /* header before the match */
191
endh = fold+head;
192
for(;;)
193
{ if((n = m) < n_src)
194
{ if(n < head)
195
goto next;
196
sm = src + n;
197
}
198
else
199
{ if((n -= n_src) < head)
200
goto next;
201
sm = tar + n;
202
}
203
204
/* make sure that the M_MIN bytes match */
205
if(!EQUAL(endh,sm))
206
goto next;
207
208
/* make sure this is a real match */
209
for(sm -= head, ss = fold; ss < endh; )
210
if(*sm++ != *ss++)
211
goto next;
212
213
/* extend forward as much as possible */
214
ss += M_MIN;
215
sm += M_MIN;
216
ends = endfold;
217
if(m < n_src && (n = (src+n_src)-sm) < (ends-fold) )
218
ends = fold+n;
219
for(;; ++ss, ++sm)
220
if(ss >= ends || *sm != *ss)
221
goto extend;
222
223
next: if((m = link[m]) == list )
224
goto endsearch;
225
}
226
227
extend: bestm = m-head;
228
n = len;
229
len = ss-fold;
230
if(ss >= endfold) /* already match everything */
231
goto endsearch;
232
233
/* check for a longer match */
234
ss -= M_MIN-1;
235
if(len == n+1)
236
HNEXT(key,ss,n);
237
else HINIT(key,ss,n);
238
}
239
240
endsearch:
241
if(bestm >= 0)
242
{ if(target && vdputinst(tab,add,fold,bestm,len) < 0)
243
return -1;
244
245
/* add a sufficient number of suffices */
246
ends = (fold += len);
247
ss = ends - (M_MIN-1);
248
if(target)
249
curm = n_src + (ss-tar);
250
else curm = (ss-src);
251
252
len = M_MIN-1;
253
add = NIL(uchar*);
254
bestm = -1;
255
}
256
else
257
{ if(!add)
258
add = fold;
259
ss = fold;
260
ends = (fold += 1); /* add one prefix */
261
}
262
263
if(ends > (endfold - (M_MIN-1)) )
264
ends = endfold - (M_MIN-1);
265
266
if(ss < ends) for(;;) /* add prefices/suffices */
267
{ n = key&size;
268
if((m = hash[n]) < 0 )
269
link[curm] = curm;
270
else
271
{ link[curm] = link[m];
272
link[m] = curm;
273
}
274
hash[n] = curm++;
275
276
if((ss += 1) >= ends)
277
break;
278
HNEXT(key,ss,n);
279
}
280
281
if((endfold-fold) < M_MIN) /* too short to match */
282
{ if(!add)
283
add = fold;
284
goto done;
285
}
286
287
HNEXT(key,fold,n);
288
}
289
290
done:
291
if(target)
292
{ if((len = (tab->tar+tab->n_tar) - endfold) > 0 ) /* match at end */
293
bestm = n_src - len;
294
else bestm = -1;
295
if(add || bestm >= 0)
296
return vdputinst(tab,add,endfold,bestm,len);
297
}
298
299
return 0;
300
}
301
302
#if __STD_C
303
static int vdprocess(Table_t* tab)
304
#else
305
static int vdprocess(tab)
306
Table_t* tab;
307
#endif
308
{
309
reg uchar *tar, *src, *endsrc, *endtar, *endt;
310
reg int hn, tn, n_src, n_tar;
311
312
/* check boundary conditions */
313
src = tab->src;
314
tar = tab->tar;
315
if((n_tar = tab->n_tar) <= 0)
316
return 0;
317
if((n_src = tab->n_src) <= 0)
318
return vdfold(tab,tar,tar+n_tar,1);
319
320
/* see if there is a large enough match at the start */
321
#define LARGE_MATCH (8*M_MIN)
322
endtar = tar + (n_tar < n_src ? n_tar : n_src);
323
for(; tar < endtar; ++tar, ++src)
324
if(*tar != *src)
325
break;
326
if((hn = tar - tab->tar) < LARGE_MATCH )
327
{ tar = tab->tar;
328
src = tab->src;
329
hn = 0;
330
}
331
else
332
{ if(vdputinst(tab,NIL(uchar*),tab->tar,0,hn) < 0)
333
return -1;
334
if(hn == n_tar)
335
return 0;
336
}
337
338
/* see if there is a large enough match at the end */
339
endtar = tab->tar + n_tar - 1;
340
endsrc = tab->src + n_src - 1;
341
endt = endtar - (n_tar < n_src ? n_tar : n_src);
342
if(endt < tar-1)
343
endt = tar-1;
344
for(; endtar > endt; --endtar, --endsrc)
345
if(*endtar != *endsrc)
346
break;
347
endtar += 1; endsrc += 1;
348
349
if((tn = (tab->tar+n_tar) - endtar) < LARGE_MATCH)
350
{ endtar = tab->tar+n_tar;
351
endsrc = tab->src+n_src;
352
tn = 0;
353
}
354
355
/* maintain big enough data for matching the remained */
356
if((endtar-tar) >= (n_tar/4))
357
{ src -= hn/4;
358
endsrc += tn/4;
359
}
360
if(vdfold(tab,src,endsrc,0) < 0)
361
return -1;
362
363
return vdfold(tab,tar,endtar,1);
364
}
365
366
367
#if __STD_C
368
long vddelta(Vddisc_t* source, Vddisc_t* target, Vddisc_t* delta)
369
#else
370
long vddelta(source, target, delta)
371
Vddisc_t* source; /* source data */
372
Vddisc_t* target; /* target data */
373
Vddisc_t* delta; /* transform output data */
374
#endif
375
{
376
reg int size, k, n;
377
reg long p, n_src, n_tar, window;
378
Table_t tab;
379
380
if(!target || (n_tar = target->size) < 0)
381
return -1;
382
if(n_tar > 0 && !target->data && !target->readf)
383
return -1;
384
385
if((n_src = source ? source->size : 0) < 0)
386
n_src = 0;
387
if(n_src > 0 && !source->data && !source->readf)
388
return -1;
389
390
if(!delta || (!delta->data && !delta->writef) )
391
return -1;
392
393
tab.n_src = tab.n_tar = tab.size = 0;
394
tab.tar = tab.src = NIL(uchar*);
395
tab.link = NIL(int*);
396
tab.hash = NIL(int*);
397
INIT(&tab.io,delta);
398
399
/* try to allocate working space */
400
window = DFLTWINDOW;
401
while(window > 0)
402
{ /* space for the target string */
403
if((long)(size = (int)window) > n_tar)
404
size = (int)n_tar;
405
if(!target->data && !(tab.tar = (uchar*)malloc(size*sizeof(uchar))) )
406
goto reduce_window;
407
k = size;
408
409
/* space for sliding header or source string */
410
if(n_src <= 0) /* compression only */
411
{ if((long)window >= n_tar)
412
{ size = 0;
413
k += n_tar;
414
}
415
else
416
{ size = (int)HEADER(window);
417
k += size;
418
}
419
}
420
else /* differencing */
421
{ if((long)(size = (int)window) > n_src)
422
size = (int)n_src;
423
k += size;
424
if(source->data)
425
size = 0;
426
}
427
if(size > 0 && !(tab.src = (uchar*)malloc(size*sizeof(uchar))) )
428
goto reduce_window;
429
430
/* space for the hash table itself */
431
n = k/2;
432
do (size = n); while((n &= n-1) != 0);
433
if(size < 64)
434
size = 64;
435
k += size;
436
437
if(!(tab.hash = (int*)malloc(k*sizeof(int))) )
438
goto reduce_window;
439
440
/* successful */
441
tab.link = tab.hash+size;
442
tab.size = size-1;
443
break;
444
445
reduce_window:
446
if(tab.tar)
447
{ free((Void_t*)tab.tar);
448
tab.tar = NIL(uchar*);
449
}
450
if(tab.src)
451
{ free((Void_t*)tab.src);
452
tab.src = NIL(uchar*);
453
}
454
if((window >>= 1) <= 0)
455
return -1;
456
}
457
458
/* amount processed */
459
n = 0;
460
461
/* output magic bytes and sizes */
462
k = sizeof(VD_MAGIC) - 1;
463
if((*_Vdwrite)(&tab.io,(uchar*)VD_MAGIC,k) != k ||
464
(*_Vdputu)(&tab.io,(ulong)n_tar) <= 0 ||
465
(*_Vdputu)(&tab.io,(ulong)n_src) <= 0 ||
466
(*_Vdputu)(&tab.io,(ulong)window) <= 0 )
467
goto done;
468
469
/* do one window at a time */
470
while(n < n_tar)
471
{ /* prepare the source string */
472
if(n_src <= 0) /* data compression */
473
{ if(n <= 0)
474
tab.n_src = 0;
475
else
476
{ size = (int)HEADER(window);
477
if(target->data)
478
tab.src = tab.tar + tab.n_tar - size;
479
else memcpy((Void_t*)tab.src,
480
(Void_t*)(tab.tar + tab.n_tar - size),
481
size );
482
tab.n_src = size;
483
}
484
}
485
else /* data differencing */
486
{ if(n < n_src)
487
{ if(window >= n_src)
488
p = 0;
489
else if((n+window) > n_src)
490
p = n_src-window;
491
else p = n;
492
if((size = n_src-p) > (int)window)
493
size = (int)window;
494
if(source->data)
495
tab.src = (uchar*)source->data + p;
496
else if((*source->readf)(tab.src,size,p,source) != size)
497
goto done;
498
tab.n_src = size;
499
}
500
/* else use last window */
501
}
502
503
/* prepare the target string */
504
if((size = (int)(n_tar-n)) > (int)window)
505
size = (int)window;
506
tab.n_tar = size;
507
if(target->data)
508
tab.tar = (uchar*)target->data + n;
509
else
510
{ size = (*target->readf)(tab.tar, size, (long)n, target);
511
if((long)size != tab.n_tar)
512
goto done;
513
}
514
515
/* reinitialize table before processing */
516
for(k = tab.size; k >= 0; --k)
517
tab.hash[k] = -1;
518
K_INIT(tab.quick,tab.recent,tab.rhere);
519
520
if(vdprocess(&tab) < 0)
521
goto done;
522
523
n += size;
524
}
525
526
done:
527
(void)(*_Vdflsbuf)(&tab.io);
528
529
if(!target->data && tab.tar)
530
free((Void_t*)tab.tar);
531
if(tab.src && ((n_src <= 0 && !target->data) || (n_src > 0 && !source->data) ) )
532
free((Void_t*)tab.src);
533
if(tab.hash)
534
free((Void_t*)tab.hash);
535
536
return tab.io.here + (tab.io.next - tab.io.data);
537
}
538
539