Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libvcodex/Vcmisc/vcbwt.c
1810 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 2003-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Phong Vo <[email protected]> *
18
* *
19
***********************************************************************/
20
#include <vclib.h>
21
22
/* Burrows-Wheeler transform.
23
**
24
** Written by Kiem-Phong Vo ([email protected])
25
*/
26
27
#if __STD_C
28
static ssize_t vcbwt(Vcodex_t* vc, const Void_t* data, size_t size, Void_t** out)
29
#else
30
static ssize_t vcbwt(vc, data, size, out)
31
Vcodex_t* vc;
32
Void_t* data;
33
size_t size;
34
Void_t** out;
35
#endif
36
{
37
Vcsfx_t *sfx;
38
ssize_t hd, sp, sz, *idx, *endidx;
39
Vcio_t io;
40
Vcchar_t *dt, *output, *bw;
41
ssize_t rv = -1;
42
43
if(size == 0)
44
return 0;
45
46
/* compute suffix array */
47
if(!(sfx = vcsfxsort(data,size)) )
48
RETURN(-1);
49
50
/* compute the location of the sentinel */
51
for(endidx = (idx = sfx->idx)+size; idx < endidx; ++idx)
52
if(*idx == 0)
53
break;
54
sp = idx - sfx->idx;
55
56
hd = vcsizeu(sp); /* encoding size of the 0th position after sorting */
57
sz = size + 1; /* size of transformed data */
58
if(!(output = vcbuffer(vc, NIL(Vcchar_t*), sz, hd)) )
59
goto done;
60
61
/* compute the transform */
62
dt = (Vcchar_t*)data; bw = output;
63
for(idx = sfx->idx; idx < endidx; ++idx, ++bw)
64
{ if(*idx == 0) /* special coding of the 0th position */
65
*bw = idx == sfx->idx ? 0 : *(bw-1);
66
else *bw = dt[*idx - 1];
67
}
68
*bw = dt[size-1];
69
70
/* filter data thru the continuation coder */
71
dt = output;
72
if(vcrecode(vc, &output, &sz, hd, 0) < 0 )
73
goto done;
74
if(dt != output) /* got a new buffer, free old one */
75
vcbuffer(vc, dt, -1, -1);
76
77
/* write out the sorted location of position-0 in data */
78
output -= hd;
79
vcioinit(&io, output, hd);
80
vcioputu(&io, sp);
81
rv = hd+sz;
82
83
/* truncate buffer to size */
84
if(!(output = vcbuffer(vc, output, rv, -1)) )
85
RETURN(-1);
86
if(out)
87
*out = output;
88
done: free(sfx);
89
return rv;
90
}
91
92
#if __STD_C
93
static ssize_t vcunbwt(Vcodex_t* vc, const Void_t* data, size_t size, Void_t** out)
94
#else
95
static ssize_t vcunbwt(vc, data, size, out)
96
Vcodex_t* vc;
97
Void_t* data;
98
size_t size;
99
Void_t** out;
100
#endif
101
{
102
ssize_t n, sp, sz;
103
Vcchar_t *dt, *output, *o;
104
ssize_t base[256], *offset;
105
Vcio_t io;
106
107
if(size == 0)
108
return 0;
109
110
vcioinit(&io, data, size);
111
112
if((sp = vciogetu(&io)) < 0) /* index of 0th position */
113
RETURN(-1);
114
115
/* retrieve transformed data */
116
sz = vciomore(&io);
117
dt = vcionext(&io);
118
119
/* invert continuation coder if there was one */
120
if(vcrecode(vc, &dt, &sz, 0, 0) < 0 )
121
RETURN(-1);
122
sz -= 1; /* actual data size */
123
124
if(sp >= sz) /* corrupted data */
125
RETURN(-1);
126
127
/* get space to decode */
128
if(!(output = vcbuffer(vc, NIL(Vcchar_t*), sz, 0)) )
129
RETURN(-1);
130
131
if(!(offset = (ssize_t*)malloc((sz+1)*sizeof(ssize_t))) )
132
RETURN(-1);
133
134
/* base and offset vector for bytes */
135
for(n = 0; n < 256; ++n)
136
base[n] = 0;
137
for(n = 0; n <= sz; ++n)
138
{ if(n == sp)
139
offset[n] = 0;
140
else
141
{ offset[n] = base[dt[n]];
142
base[dt[n]] += 1;
143
}
144
}
145
for(sp = 0, n = 0; n < 256; ++n)
146
{ ssize_t c = base[n];
147
base[n] = sp;
148
sp += c;
149
}
150
151
/* now invert the transform */
152
for(n = sz, o = output+sz-1; o >= output; --o)
153
{ *o = dt[n];
154
n = base[*o] + offset[n];
155
}
156
157
free(offset);
158
vcbuffer(vc, dt, -1, -1);
159
160
if(out)
161
*out = output;
162
return sz;
163
}
164
165
166
Vcmethod_t _Vcbwt =
167
{ vcbwt,
168
vcunbwt,
169
0,
170
"bwt", "Burrows-Wheeler transform.",
171
"[-version?bwt (AT&T Research) 2003-01-01]" USAGE_LICENSE,
172
NIL(Vcmtarg_t*),
173
4*1024*1024,
174
0
175
};
176
177
VCLIB(Vcbwt)
178
179