Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libvcodex/vcperiod.c
1808 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 2003-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Phong Vo <[email protected]> *
18
* *
19
***********************************************************************/
20
#include "vchdr.h"
21
22
/* Compute the highest period in all quasi-cycles of a data set.
23
**
24
** Written by Kiem-Phong Vo
25
*/
26
27
#if __STD_C
28
ssize_t vcperiod(const Void_t* data, ssize_t dtsz)
29
#else
30
ssize_t vcperiod(data, dtsz)
31
Void_t* data;
32
ssize_t dtsz;
33
#endif
34
{
35
Vcchar_t *dt;
36
Vcsfxint_t *dist, *peak, *lcp;
37
Vcsfxint_t i, n, k, p, m, s, c, sz;
38
Vcsfx_t *sfx;
39
40
#define PEAKBOUND 3 /* bound to check for local peaks */
41
if(!data || (sz = (Vcsfxint_t)dtsz) <= PEAKBOUND*PEAKBOUND)
42
return -1;
43
44
/* compute suffix array and longest-common-prefix array */
45
if(!(lcp = (Vcsfxint_t*)calloc(1, sz*sizeof(Vcsfxint_t))) )
46
return -1;
47
if(!(sfx = vcsfxsort(data, (size_t)sz)) )
48
{ free(lcp);
49
return -1;
50
}
51
for(dt = (Vcchar_t*)data, p = 0, i = 0; i < sz; ++i)
52
{ if(sfx->inv[i] == 0)
53
continue;
54
k = sfx->idx[sfx->inv[i]-1];
55
while((i+p) < sz && (k+p) < sz && dt[i+p] == dt[k+p])
56
p += 1;
57
lcp[sfx->inv[i]] = p;
58
if(p > 0)
59
p -= 1;
60
}
61
62
/* dist[k] counts number of matches at distance k */
63
dist = sfx->inv; memset(dist, 0, sz*sizeof(Vcsfxint_t));
64
c = vclogi(sz); /* bound search to save time */
65
for(i = 0; i < sz; ++i)
66
{ for(m = 0, s = c, k = i+1; k < sz && s >= 0; ++k, --s)
67
{ if(lcp[k] == 0)
68
break;
69
if((m = sfx->idx[k] - sfx->idx[i]) > 0 ) /* match distance */
70
break;
71
}
72
for(n = 0, s = c, p = i-1; p >= 0 && s >= 0; --p, --s)
73
{ if(lcp[p] == 0)
74
break;
75
if((n = sfx->idx[p] - sfx->idx[i]) > 0 ) /* match distance */
76
break;
77
}
78
if(m > 0 && m <= n) /* count the closer one */
79
dist[m] += 1;
80
else if(n > 0 && n <= m)
81
dist[n] += 1;
82
}
83
84
/* compute an array of candidate peaks */
85
peak = sfx->idx; memset(peak, 0, sz*sizeof(Vcsfxint_t));
86
87
/* initialize running sum of distances of a suitable neighborhood */
88
for(m = 0, i = 0; i < 2*PEAKBOUND+1; ++i)
89
m += dist[i];
90
91
/* a peak is larger than the total sum of a small neighborhood around it */
92
for(n = sz - 2*PEAKBOUND, i = PEAKBOUND; i < n; ++i)
93
{ if(dist[i] > 2*(m - dist[i]) )
94
peak[i] = 1;
95
m = m - dist[i-PEAKBOUND] + dist[i+PEAKBOUND];
96
}
97
#ifdef DEBUG
98
for(i = 2; i < sz; ++i)
99
{ if(dist[i] <= 0)
100
continue;
101
DEBUG_PRINT(3,"%d: ",i); DEBUG_PRINT(3,"%d",dist[i]);
102
if(peak[i])
103
DEBUG_WRITE(3, "<p>", 3);
104
DEBUG_WRITE(3,"\n", 1);
105
}
106
#endif
107
108
/* a period is a peak with multiples that have matches */
109
p = 0; m = 0; c = vclogi(sz);
110
for(i = PEAKBOUND; 2*i < sz; ++i)
111
{ if(!peak[i] || !peak[2*i])
112
continue;
113
for(s = 1, n = dist[i], k = i+i; k < sz; k += i)
114
{ if(dist[k] == 0)
115
continue;
116
else if(dist[k] > 2*n ) /* k is a better period */
117
{ n = 0;
118
break;
119
}
120
121
n += dist[k]; /* sum the weights */
122
if(peak[k])
123
{ s += 1; /* count peaks */
124
peak[k] = 0;
125
}
126
}
127
128
if(s > c && n > m )
129
{ p = i; m = n; c = s; }
130
}
131
132
/**/DEBUG_PRINT(2, "dtsz=%d, ", sz); DEBUG_PRINT(2, "period=%d\n", p);
133
free(lcp);
134
free(sfx);
135
return (ssize_t)p;
136
}
137
138