Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libodelta/delta.c
1808 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1989-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Phong Vo <[email protected]> *
18
* *
19
***********************************************************************/
20
#pragma prototyped
21
#include "update.h"
22
#include "suftree.h"
23
24
/*
25
** Compute delta commands to transform the source string 'src'
26
** to the target string 'tar'. Small blockmoves are transformed
27
** to blockadds for space efficiency.
28
** Return -1 in case of error.
29
**
30
** For details on computing blockmoves, see:
31
** "The String-to-String Correction Problem with Block Moves"
32
** W. Tichy, ACM TOCS, v.2, No.4, 1984, pp.309-321.
33
**
34
** Written by Kiem-Phong Vo, 5/18/88
35
*/
36
37
#define M_MAX 9 /* max size of a block move instruction */
38
#define A_MAX 5 /* max size of the header of an add instruction */
39
40
/* structure of a delta instruction */
41
typedef struct _m_
42
{
43
int type; /* DELTA_MOVE or DELTA_ADD */
44
long size; /* size of the block being moved or added */
45
long addr; /* offset where the block starts */
46
struct _m_ *last; /* doubly linked list for easy insert/delete */
47
struct _m_ *next;
48
} Move;
49
50
/* bases of the source and target strings */
51
static char *Bsrc, *Btar;
52
53
/* Data buffer area */
54
static char *Ddata, *Dend, *Dnext;
55
static int Dfd;
56
57
#define delinit(buf,fd) (Ddata=Dnext=buf, Dend=buf+BUFSIZE, Dfd=fd)
58
#define delflush() (write(Dfd,Ddata,Dnext-Ddata) >= 0 ? (Dnext=Ddata,0) : -1)
59
60
static int delputc(int byte)
61
{
62
if(Dnext == Dend)
63
if(delflush() < 0)
64
return -1;
65
*Dnext++ = byte;
66
return 0;
67
}
68
69
static int delputl(register int n, register long v)
70
{
71
register int i;
72
unsigned char c[4];
73
74
for(i = 0; i < n; ++i)
75
{
76
c[i] = (unsigned char)(v%BASE);
77
v /= BASE;
78
}
79
for(i = n-1; i >= 0; --i)
80
if(delputc((char)c[i]) < 0)
81
return -1;
82
return 0;
83
}
84
85
static int delputs(register long n, register long addr)
86
{
87
if(n < (Dend-Dnext))
88
{
89
memcpy(Dnext,Btar+addr,n);
90
Dnext += n;
91
}
92
else
93
{
94
if(delflush() < 0)
95
return -1;
96
if(write(Dfd,Btar+addr,n) != n)
97
return -1;
98
}
99
return 0;
100
}
101
102
/* write an instruction */
103
static int putMove(Move* ip)
104
{
105
register char inst;
106
107
inst = ip->type;
108
inst |= (NBYTE(ip->size)&07) << 3;
109
if(ip->type == DELTA_MOVE)
110
{
111
inst |= NBYTE(ip->addr)&07;
112
if(delputc(inst) < 0 ||
113
delputl(NBYTE(ip->size),ip->size) < 0 ||
114
delputl(NBYTE(ip->addr),ip->addr) < 0)
115
return -1;
116
}
117
else
118
{
119
if(delputc(inst) < 0 ||
120
delputl(NBYTE(ip->size),ip->size) < 0 ||
121
delputs(ip->size,ip->addr) < 0)
122
return -1;
123
}
124
return 0;
125
}
126
127
/* constructor for Move */
128
static Move *newMove(int type, long size, long addr, Move* last)
129
{
130
register Move *ip = (Move*) malloc(sizeof(Move));
131
if(!ip) return 0;
132
ip->type = type;
133
ip->size = size;
134
ip->addr = addr;
135
if(last)
136
{
137
last->next = ip;
138
ip->last = last;
139
}
140
else ip->last = 0;
141
ip->next = 0;
142
return ip;
143
}
144
145
/* destructor for Move, return the elt after move */
146
static Move *delMove(Move* ip)
147
{
148
register Move *next = ip->next;
149
register Move *last = ip->last;
150
if(last)
151
last->next = next;
152
if(next)
153
next->last = last;
154
free(ip);
155
return next ? next : last;
156
}
157
158
/* make a new add command */
159
static Move *makeAdd(char* beg, char* end, Move* last)
160
{
161
register Move *ip;
162
163
ip = newMove(DELTA_ADD,(long)(end-beg),(long)(beg-Btar),NiL);
164
if(!ip)
165
return 0;
166
167
/* remove small previous adjacent moves */
168
while(last)
169
{
170
register int a_size, cost_m, cost_a;
171
172
if(last->type == DELTA_ADD)
173
break;
174
175
cost_m = NBYTE(last->size) + NBYTE(last->addr) +
176
NBYTE(ip->size) + ip->size;
177
a_size = ip->size + last->size;
178
cost_a = NBYTE(a_size) + a_size;
179
if(cost_m < cost_a)
180
break;
181
182
ip->size = a_size;
183
ip->addr -= last->size;
184
last = delMove(last);
185
}
186
187
/* merge with adjacent adds */
188
if(last && last->type == DELTA_ADD)
189
{
190
ip->size += last->size;
191
ip->addr -= last->size;
192
last = delMove(last);
193
}
194
195
if(last)
196
{
197
last->next = ip;
198
ip->last = last;
199
}
200
return ip;
201
}
202
203
/* check to see if a move is appropriate */
204
static int chkMove(long m_size, long m_addr, long a_size)
205
{
206
register int cost_m, cost_a;
207
208
cost_m = NBYTE(m_size) + NBYTE(m_addr);
209
cost_a = NBYTE(m_size) + m_size;
210
if(cost_m >= cost_a)
211
return 0;
212
213
/* it's good but it may be better to merge it to an add */
214
if(a_size > 0)
215
{
216
register int m_cost, a_cost;
217
218
m_cost = cost_m + NBYTE(a_size) + a_size;
219
a_size += m_size;
220
a_cost = NBYTE(a_size) + a_size;
221
222
/* it is better to merge! */
223
if(m_cost >= a_cost)
224
return 0;
225
}
226
227
return m_size;
228
}
229
230
/* optimize a sequence of moves */
231
static Move *optMove(register Move* s)
232
{
233
register long add, m_cost, a_cost;
234
register Move *ip, *last;
235
236
add = (s->last && s->last->type == DELTA_ADD) ? s->last->size : 0;
237
238
m_cost = 0;
239
a_cost = 0;
240
for(ip = s; ip; ip = ip->next)
241
{
242
register long cost_m, cost_a;
243
244
if(ip->type == DELTA_ADD || ip->size > (M_MAX+A_MAX))
245
break;
246
247
m_cost += 1+NBYTE(ip->size)+NBYTE(ip->addr);
248
a_cost += ip->size;
249
250
/* costs of alternatives */
251
cost_m = m_cost;
252
cost_a = a_cost;
253
if(add > 0)
254
{
255
cost_m += 1 + add + NBYTE(add);
256
cost_a += add;
257
}
258
if(ip->next && ip->next->type == DELTA_ADD)
259
{
260
cost_m += 1 + ip->next->size + NBYTE(ip->next->size);
261
cost_a += ip->next->size;
262
}
263
cost_a += 1 + NBYTE(cost_a);
264
265
/* conversion is bad */
266
if(cost_m < cost_a)
267
continue;
268
269
/* convert the entire sequence to an add */
270
s->type = DELTA_ADD;
271
while(ip != s)
272
{
273
last = ip->last;
274
s->size += ip->size;
275
delMove(ip);
276
ip = last;
277
}
278
279
/* merge adjacent adds */
280
if((last = s->last) && last->type == DELTA_ADD)
281
{
282
last->size += s->size;
283
delMove(s);
284
s = last;
285
}
286
if(s->next && s->next->type == DELTA_ADD)
287
{
288
s->size += s->next->size;
289
delMove(s->next);
290
}
291
/* done */
292
break;
293
}
294
return s;
295
}
296
297
/* the real thing */
298
int
299
delta(char* src, long n_src, char* tar, long n_tar, int delfd)
300
{
301
register char *sp, *tp, *esrc, *etar;
302
register long size, addr;
303
Suftree *tree;
304
Move *moves, *last;
305
char inst, buf[BUFSIZE];
306
307
/* initialize the output area */
308
delinit(buf,delfd);
309
310
/* output file sizes */
311
inst = DELTA_TYPE | ((NBYTE(n_src)&07) << 3) | (NBYTE(n_tar)&07);
312
if(delputc(inst) < 0)
313
return -1;
314
if(delputl(NBYTE(n_src),n_src) < 0 || delputl(NBYTE(n_tar),n_tar) < 0)
315
return -1;
316
317
/* bases */
318
Bsrc = src;
319
Btar = tar;
320
esrc = src + n_src - 1;
321
etar = tar + n_tar - 1;
322
323
/* initialize list and last block */
324
moves = 0;
325
last = 0;
326
327
/* try making suffix tree */
328
if(!(tree = n_tar > 0 ? bldsuftree(src,n_src) : (Suftree*)0))
329
{
330
/* not enough space for tree, remove matching prefix and suffix */
331
for(; src <= esrc && tar <= etar; ++src, ++tar)
332
if(*src != *tar)
333
break;
334
if((size = src-Bsrc) > 0)
335
{
336
register int cost_m, cost_a;
337
338
cost_m = NBYTE(size) + NBYTE(0);
339
cost_a = NBYTE(size) + size;
340
if(cost_m < cost_a)
341
{
342
moves = newMove(DELTA_MOVE,size,0L,NiL);
343
if(!moves)
344
return -1;
345
n_src -= src-Bsrc;
346
n_tar -= tar-Btar;
347
}
348
else
349
{
350
src = Bsrc;
351
tar = Btar;
352
}
353
}
354
355
for(sp = esrc, tp = etar; sp >= src && tp >= tar; --sp, --tp)
356
if(*sp != *tp)
357
break;
358
if((size = esrc-sp) > 0)
359
{
360
addr = sp+1-Bsrc;
361
if(chkMove(size,addr,0L) > 0)
362
{
363
last = newMove(DELTA_MOVE,size,addr,NiL);
364
if(!last)
365
return -1;
366
esrc = sp;
367
etar = tp;
368
n_tar = etar-tar+1;
369
n_src = esrc-src+1;
370
}
371
}
372
373
/* try making the tree again */
374
tree = n_tar > 0 ? bldsuftree(src,n_src) : (Suftree*)0;
375
}
376
377
/* compute block moves */
378
tp = 0;
379
while(n_tar > 0)
380
{
381
char *match;
382
383
if(tree)
384
size = mtchsuftree(tree,tar,n_tar,&match);
385
else size = mtchstring(src,n_src,tar,n_tar,&match);
386
if(size < 0)
387
return -1;
388
if(size > 0)
389
size = chkMove(size,(long)(match-Bsrc),(long)(tp ? tp-tar : 0));
390
391
/* output a block move */
392
if(size > 0)
393
{
394
if(tp)
395
{
396
moves = makeAdd(tp,tar,moves);
397
if(!moves)
398
return -1;
399
tp = 0;
400
}
401
moves = newMove(DELTA_MOVE,size,(long)(match-Bsrc),moves);
402
if(!moves)
403
return -1;
404
tar += size;
405
n_tar -= size;
406
}
407
else
408
{
409
if(!tp)
410
tp = tar;
411
tar += 1;
412
n_tar -= 1;
413
}
414
}
415
416
/* add any remaining blocks */
417
if(tp)
418
{
419
if(last && chkMove(last->size,last->addr,(long)(tar-tp)) <= 0)
420
{
421
tar += last->size;
422
last = delMove(last);
423
}
424
moves = makeAdd(tp,tar,moves);
425
if(!moves)
426
return -1;
427
}
428
if(last)
429
{
430
moves->next = last;
431
last->last = moves;
432
}
433
434
/* release space use for string matching */
435
if(tree)
436
delsuftree(tree);
437
else mtchstring(NiL,0L,NiL,0L,NiL);
438
439
/* optimize move instructions */
440
if(moves)
441
{
442
register Move *ip;
443
444
ip = moves;
445
while(ip->last)
446
ip = ip->last;
447
for(; ip; ip = ip->next)
448
if(ip->type == DELTA_MOVE && ip->size <= (M_MAX+A_MAX))
449
moves = ip = optMove(ip);
450
451
while(moves->last)
452
moves = moves->last;
453
}
454
455
/* write out the move instructions */
456
addr = 0L;
457
while(moves)
458
{
459
if(moves->type == DELTA_ADD)
460
moves->addr = addr;
461
addr += moves->size;
462
if(putMove(moves) < 0)
463
return -1;
464
moves = delMove(moves);
465
}
466
467
/* write ending token */
468
delputc((char)DELTA_TYPE);
469
470
/* flush buffer */
471
return delflush();
472
}
473
474