Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libvcodex/Vcwindow/vcwfile.c
1810 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 2003-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Phong Vo <[email protected]> *
18
* *
19
***********************************************************************/
20
#include "vcwhdr.h"
21
22
23
/* Search for likely places in a stream where there may be a match.
24
**
25
** Written by Kiem-Phong Vo
26
*/
27
28
#define CSIZE (1<<12) /* number of hash table slots */
29
#define NVOTE(w) (4*w) /* # of votes for a position */
30
31
typedef struct _cand_s /* a candidate index */
32
{ int idx; /* actual index */
33
int vote; /* number of votes received */
34
} Cand_t;
35
36
/* distance between two signatures */
37
#define DIST(big,small) (big == 0 ? 0. : (big - small)/((double)big) )
38
39
/* the below criterion for accepting the closeness of two signatures is based
40
** on the magnitude of the signatures. It lessens the distortion from very
41
** large signatures which tend to make DIST() gives small distance.
42
*/
43
#define ACCEPT(sig) (sig > (1<<24) ? .05 : \
44
sig > (1<<20) ? .06 : \
45
sig > (1<<16) ? .07 : .08)
46
47
#if __STD_C
48
static int sigcmp(Void_t* one, Void_t* two, Void_t* disc)
49
#else
50
static int sigcmp(one, two, disc)
51
Void_t* one;
52
Void_t* two;
53
Void_t* disc;
54
#endif
55
{ Grint_t **o = (Grint_t**)one;
56
Grint_t **t = (Grint_t**)two;
57
58
return **o < **t ? -1 : **o == **t ? 0 : 1;
59
}
60
61
#if __STD_C
62
static int votecmp(Void_t* one, Void_t* two, Void_t* disc)
63
#else
64
static int votecmp(one, two, disc)
65
Void_t* one;
66
Void_t* two;
67
Void_t* disc;
68
#endif
69
{ Cand_t *o = (Cand_t*)one;
70
Cand_t *t = (Cand_t*)two;
71
72
return t->vote - o->vote;
73
}
74
75
#if __STD_C
76
static int idxcmp(Void_t* one, Void_t* two, Void_t* disc)
77
#else
78
static int sigcmp(one, two)
79
Void_t* one;
80
Void_t* two;
81
Void_t* disc;
82
#endif
83
{
84
return *((int*)one) - *((int*)two);
85
}
86
87
#if __STD_C
88
Vcwfile_t* vcwfopen(Sfio_t* f)
89
#else
90
Vcwfile_t* vcwfopen(f)
91
Sfio_t* f; /* file to match against */
92
#endif
93
{
94
Sfoff_t size;
95
Vcwfile_t *vcwf;
96
97
if(!f)
98
return NIL(Vcwfile_t*);
99
if(sfseek(f, (Sfoff_t)0, 1) < 0)
100
return NIL(Vcwfile_t*);
101
if((size = sfsize(f)) <= 0 )
102
return NIL(Vcwfile_t*);
103
104
if(!(vcwf = (Vcwfile_t*)malloc(sizeof(Vcwfile_t))) )
105
return NIL(Vcwfile_t*);
106
vcwf->file = f;
107
vcwf->size = size;
108
vcwf->done = 0;
109
vcwf->sig = NIL(Grint_t*);
110
vcwf->ssig = NIL(Grint_t**);
111
vcwf->nsig = 0;
112
vcwf->nidx = 0;
113
vcwf->nwork = 0;
114
vcwf->work = NIL(Grint_t*);
115
116
return vcwf;
117
}
118
119
#if __STD_C
120
void vcwfclose(Vcwfile_t* vcwf)
121
#else
122
void vcwfclose(vcwf)
123
Vcwfile_t* vcwf;
124
#endif
125
{
126
if(vcwf)
127
{ if(vcwf->sig)
128
free(vcwf->sig);
129
if(vcwf->ssig)
130
free(vcwf->ssig);
131
if(vcwf->work)
132
free(vcwf->work);
133
free(vcwf);
134
}
135
}
136
137
#if __STD_C
138
static int vcwfsig(Vcwfile_t* vcwf)
139
#else
140
static int vcwfsig(vcwf)
141
Vcwfile_t* vcwf;
142
#endif
143
{
144
int nsig;
145
Grint_t *sig, *esig, **ssig;
146
Vcchar_t *data;
147
Sfoff_t cpos;
148
149
vcwf->done = -1;
150
151
if((nsig = (int)(vcwf->size/((Sfoff_t)NG_SIZE))) <= 0)
152
return -1;
153
if(!(vcwf->sig = (Grint_t*)malloc(nsig*sizeof(Grint_t))) ||
154
!(vcwf->ssig = (Grint_t**)malloc(nsig*sizeof(Grint_t*))) )
155
return -1;
156
157
vcwf->nsig = nsig;
158
159
if((cpos = sfseek(vcwf->file, (Sfoff_t)0, 1)) < 0 ||
160
sfseek(vcwf->file,(Sfoff_t)0,0) != (Sfoff_t)0 )
161
return -1;
162
163
ssig = vcwf->ssig;
164
for(esig = (sig = vcwf->sig) + nsig; sig < esig; ++sig, ++ssig)
165
{ if(!(data = sfreserve(vcwf->file, NG_SIZE, 0)) )
166
return -1;
167
*sig = vcwngsig(data, NG_SIZE);
168
*ssig = sig;
169
}
170
if(sfseek(vcwf->file, cpos, 0) < 0)
171
return -1;
172
173
/* construct sorted list of signatures */
174
vcqsort(vcwf->ssig, nsig, sizeof(Grint_t*), sigcmp, 0);
175
176
vcwf->done = 1;
177
178
return 0;
179
}
180
181
182
#if __STD_C
183
static void vcwfvote(Vcwfile_t* vcwf, Cand_t* cand, Grint_t* msig, int v, int maxi)
184
#else
185
static void vcwfvote(vcwf, cand, m, v, maxi)
186
Vcwfile_t* vcwf; /* window matching handle */
187
Cand_t* cand; /* list of candidates */
188
Grint_t* msig; /* matching location to voter v below */
189
int v; /* position of voter in work[] */
190
int maxi; /* max index that can be matched */
191
#endif
192
{
193
int i, idx, vote;
194
Grint_t ws, ss, t;
195
196
if((idx = (msig - vcwf->sig) - v) < 0 || idx > maxi)
197
return;
198
199
/* vote from v itself */
200
vote = *msig == vcwf->work[v] ? 2 : 1;
201
202
/* count "contiguous block of neighbors" on left side */
203
for(i = v-1; i > 0; --i)
204
{ if((ws = vcwf->work[i]) == (ss = msig[i-v]) )
205
vote += 2;
206
else
207
{ if(ws < ss)
208
{ t = ws; ws = ss; ss = t; }
209
if(DIST(ws,ss) < ACCEPT(ss))
210
vote += 1;
211
else break;
212
}
213
}
214
215
/* count "contiguous block of neighbors" on right side */
216
for(i = v+1; i < vcwf->nwork; ++i)
217
{ if((ws = vcwf->work[i]) == (ss = msig[i-v]) )
218
vote += 2;
219
else
220
{ if(ws < ss)
221
{ t = ws; ws = ss; ss = t; }
222
if(DIST(ws,ss) < ACCEPT(ss))
223
vote += 1;
224
else break;
225
}
226
}
227
228
i = idx & (CSIZE-1);
229
if(cand[i].vote == 0) /* slot is empty, just insert index */
230
{ cand[i].idx = idx;
231
cand[i].vote = vote;
232
}
233
else if(cand[i].idx == idx) /* increase vote count */
234
cand[i].vote += vote;
235
else if(cand[i].vote < vote)
236
{ cand[i].idx = idx;
237
cand[i].vote = vote;
238
}
239
}
240
241
/* compute a sorted list of indices whose signatures are close to "sig" */
242
#if __STD_C
243
int vcwfsearch(Vcwfile_t* vcwf, Vcchar_t* data, size_t size )
244
#else
245
int vcwfsearch(vcwf, data, size)
246
Vcwfile_t* vcwf;
247
Vcchar_t* data; /* data to be matched */
248
size_t size; /* size of data */
249
#endif
250
{
251
Grint_t **ss, **es, **ms, workn;
252
int v, n, maxi;
253
int nwork;
254
Grint_t *work;
255
Cand_t cand[CSIZE];
256
257
if(vcwf->done < 0 || (vcwf->done == 0 && vcwfsig(vcwf) < 0) )
258
return -1;
259
260
/* if window is larger than file size, no match possible */
261
if(size >= vcwf->size || size <= NG_SIZE)
262
return (vcwf->nidx = 0);
263
264
if((nwork = size/NG_SIZE) > vcwf->nwork)
265
{ if(vcwf->work)
266
free(vcwf->work);
267
if(!(vcwf->work = (Grint_t*)malloc(nwork*sizeof(Grint_t))) )
268
return -1;
269
}
270
271
vcwf->nwork = nwork;
272
work = vcwf->work;
273
274
for(n = 0; n < nwork; ++n)
275
work[n] = vcwngsig(data + n*NG_SIZE, NG_SIZE);
276
277
/* clear candidate hash table */
278
for(n = 0; n < CSIZE; ++n)
279
cand[n].vote = 0;
280
281
maxi = vcwf->nsig - nwork; /* max location that can be matched */
282
283
for(n = 0; n < nwork; n += 1)
284
{ /* find the closest one to the search value */
285
workn = work[n];
286
for(es = (ss = vcwf->ssig) + vcwf->nsig; (es-ss) > 1; )
287
{ ms = ss + (es - ss)/2;
288
if(**ms == workn)
289
ss = es = ms;
290
else if(**ms > workn)
291
es = ms-1;
292
else ss = ms+1;
293
}
294
295
if(ss >= (es = vcwf->ssig + vcwf->nsig) )
296
ss = es-1;
297
while(ss >= vcwf->ssig && **ss > workn)
298
ss -= 1;
299
while((ms = ss+1) < es && **ms <= workn)
300
ss = ms;
301
302
v = NVOTE(nwork);
303
while(v > 0 && (ss >= vcwf->ssig || ms < es) )
304
{ if(ss >= vcwf->ssig)
305
{ if(DIST(workn,**ss) < ACCEPT(**ss) )
306
{ vcwfvote(vcwf, cand, *ss, n, maxi);
307
v -= 1; ss -= 1;
308
}
309
else ss = vcwf->ssig-1;
310
}
311
if(ms < es)
312
{ if(DIST(**ms,workn) < ACCEPT(workn) )
313
{ vcwfvote(vcwf, cand, *ms, n, maxi);
314
v -= 1; ms += 1;
315
}
316
else ms = es;
317
}
318
}
319
}
320
321
#define NIDX(vcwf) (sizeof(vcwf->idx)/sizeof(vcwf->idx[0]))
322
vcqsort(cand, CSIZE, sizeof(Cand_t), votecmp, 0);
323
for(n = 0; n < NIDX(vcwf); ++n)
324
{ if(cand[n].vote <= 0)
325
break;
326
vcwf->idx[n] = cand[n].idx;
327
}
328
329
if((vcwf->nidx = n) > 1)
330
vcqsort(vcwf->idx, n, sizeof(int), idxcmp, 0);
331
return vcwf->nidx;
332
}
333
334