Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libvcodex/Vchuff/vchuffpart.c
1810 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 2003-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Phong Vo <[email protected]> *
18
* *
19
***********************************************************************/
20
#include "vchhdr.h"
21
22
/* Recursively partitioning a data string to improve Huffman coding
23
**
24
** Written by Binh Dao Vo and Kiem-Phong Vo
25
*/
26
27
#define PT_NBIT (3.00) /* est. # of bits per code for table */
28
#define PT_NONE (1.00) /* est. # of bits per nonappearing byte */
29
#define PT_MIN (512) /* minimum part size for part() */
30
#define PT_DIV (3) /* factor for subdividing a part */
31
#define PTMINSZ(sz) ((sz/PT_DIV) < PT_MIN ? PT_MIN : (sz/PT_DIV) )
32
33
typedef struct _part_s
34
{ Vcodex_t* vch; /* Huffman coder handle */
35
size_t ctab; /* est. cost to encode a Huffman table */
36
Vcio_t* io; /* where to write out compressed data */
37
} Part_t;
38
39
/* compute the byte frequencies and encoding cost of a string */
40
#if __STD_C
41
static double entropy(ssize_t* freq, Vcchar_t* data, size_t size)
42
#else
43
static double entropy(freq, data, size)
44
ssize_t* freq;
45
Vcchar_t* data;
46
size_t size;
47
#endif
48
{
49
double e;
50
int i;
51
52
CLRTABLE(freq, VCH_SIZE);
53
ADDFREQ(freq, Vcchar_t*, data, size);
54
55
for(e = 0., i = 0; i < VCH_SIZE; ++i)
56
e += freq[i]*vclog(freq[i]);
57
58
return e;
59
}
60
61
/* update a frequency array given another one adding/robbing it */
62
#if __STD_C
63
static double eupdate(double e, ssize_t* freq, ssize_t* fr, int add)
64
#else
65
static double eupdate(e, freq, fr, add)
66
double e; /* current entropy value */
67
ssize_t* freq; /* current frequency array */
68
ssize_t* fr; /* the one adding/robbing it */
69
int add; /* 1/0 for adding/subtracting */
70
#endif
71
{
72
int b;
73
74
for(b = 0; b < VCH_SIZE; ++b)
75
{ if(fr[b] == 0)
76
continue;
77
e -= freq[b]*vclog(freq[b]);
78
if(add)
79
freq[b] += fr[b];
80
else freq[b] -= fr[b];
81
e += freq[b]*vclog(freq[b]);
82
}
83
84
return e;
85
}
86
87
#if __STD_C
88
static int addpart(Part_t* pt, Void_t* data, size_t size, ssize_t* freq)
89
#else
90
static int addpart(pt, data, size, freq)
91
Part_t* pt;
92
Void_t* data; /* data to compress */
93
size_t size; /* size of data */
94
ssize_t* freq; /* frequency of bytes */
95
#endif
96
{
97
Vcchar_t *buf;
98
ssize_t sz;
99
100
vchcopy(pt->vch, freq, NIL(ssize_t*), 0);
101
if((sz = vcapply(pt->vch, data, size, &buf)) <= 0)
102
return -1;
103
vcioputu(pt->io, sz);
104
vcioputs(pt->io, buf, sz);
105
return 0;
106
}
107
108
/* recursively partition a dataset into parts with different statistical models */
109
#if __STD_C
110
static int part(Part_t* pt, Vcchar_t* data, size_t size, ssize_t* rfreq, double re)
111
#else
112
static int part(pt, data, size, pos, rfreq, re)
113
Part_t* pt; /* partition handle */
114
Vcchar_t* data; /* dataset to be partitioned */
115
size_t size; /* size of data */
116
ssize_t* rfreq; /* frequencies of all bytes */
117
double re; /* entropy of this dataset */
118
#endif
119
{
120
double le, lc, rc, cost;
121
ssize_t b, lfreq[VCH_SIZE];
122
ssize_t p, endp, minsz;
123
124
/* cost of coding the entire data set */
125
cost = pt->ctab + size*vclog(size) - re;
126
127
/* see if large enough to split */
128
minsz = PTMINSZ(size);
129
if(size < 2*minsz)
130
return addpart(pt, data, size, rfreq);
131
132
/* first partition is at minsz */
133
le = entropy(lfreq, data, minsz);
134
re = eupdate(re, rfreq, lfreq, 0);
135
136
/* loop adding 1 to left part and subtracting 1 from right part */
137
for(p = minsz, endp = size-minsz; p < endp; )
138
{ b = data[p++];
139
140
/* update the costs of left&right parts */
141
le -= lfreq[b]*vclog(lfreq[b]);
142
re -= rfreq[b]*vclog(rfreq[b]);
143
lfreq[b] += 1; rfreq[b] -= 1;
144
le += lfreq[b]*vclog(lfreq[b]);
145
re += rfreq[b]*vclog(rfreq[b]);
146
147
lc = pt->ctab + p*vclog(p) - le;
148
rc = pt->ctab + (size-p)*vclog(size-p) - re;
149
if((lc + rc) < cost)
150
{ /* good partition, recurse to do the two parts */
151
if(part(pt, data, p, lfreq, le) < 0 )
152
return -1;
153
if(part(pt, data+p, size-p, rfreq, re) < 0)
154
return -1;
155
return 0;
156
}
157
}
158
159
eupdate(re, rfreq, lfreq, 1); /* retotal the frequencies */
160
return addpart(pt, data, size, rfreq);
161
}
162
163
#if __STD_C
164
static ssize_t pthuff(Vcodex_t* vc, const Void_t* data, size_t size, Void_t** out)
165
#else
166
static ssize_t pthuff(vc, data, size, out)
167
Vcodex_t* vc; /* Vcodex handle */
168
Void_t* data; /* target data to be compressed */
169
size_t size; /* data size */
170
Void_t** out; /* to return output buffer */
171
#endif
172
{
173
ssize_t n, c, freq[VCH_SIZE];
174
double e;
175
Vcchar_t *output, *dt;
176
Vcio_t io;
177
Part_t *pt = vcgetmtdata(vc, Part_t*);
178
179
if(size == 0)
180
return 0;
181
182
/* estimate table entropy */
183
e = entropy(freq, (Vcchar_t*)data, size);
184
185
/* estimate the cost of emitting a Huffman table */
186
for(c = 0, n = 0; n < VCH_SIZE; ++n)
187
if(freq[n])
188
c += 1;
189
pt->ctab = c*PT_NBIT + (VCH_SIZE-c)*PT_NONE;
190
191
/* get buffer space to write data */
192
n = size + VCH_SIZE;
193
if(!(output = vcbuffer(vc, NIL(Vcchar_t*), n, 0)) )
194
return -1;
195
vcioinit(&io, output, n);
196
pt->io = &io;
197
198
vcioputu(&io, size); /* write out the original size */
199
200
/* recursively partition into smaller segments and compress */
201
if(part(pt, (Vcchar_t*)data, size, freq, e) < 0)
202
return -1;
203
204
pt->io = NIL(Vcio_t*);
205
206
dt = output;
207
n = vciosize(&io); /* compressed data size */
208
if(vcrecode(vc, &output, &n, 0, 0) < 0)
209
return -1;
210
if(dt != output) /* free unused buffers */
211
vcbuffer(vc, dt, -1, -1);
212
vcbuffer(pt->vch, NIL(Vcchar_t*), -1, -1);
213
214
if(out)
215
*out = output;
216
217
return n;
218
}
219
220
#if __STD_C
221
static ssize_t ptunhuff(Vcodex_t* vc, const Void_t* orig, size_t size, Void_t** out)
222
#else
223
static ssize_t ptunhuff(vc, orig, size, out)
224
Vcodex_t* vc; /* Vcodex handle */
225
Void_t* orig; /* target data to be matched */
226
size_t size; /* data size */
227
Void_t** out; /* to return output buffer */
228
#endif
229
{
230
Vcchar_t *output, *dt, *data;
231
ssize_t sz, os, s, n;
232
Vcio_t rd, wr;
233
Part_t *pt = vcgetmtdata(vc, Part_t*);
234
235
if(size == 0)
236
return 0;
237
238
data = (Vcchar_t*)orig; sz = (ssize_t)size;
239
if(vcrecode(vc, &data, &sz, 0, 0) < 0 )
240
return -1;
241
size = sz;
242
243
/* get the size of uncompressed data */
244
vcioinit(&rd, data, size);
245
if((sz = vciogetu(&rd)) <= 0)
246
return -1;
247
248
/* set up buffer */
249
if(!(output = vcbuffer(vc, NIL(Vcchar_t*), sz, 0)) )
250
return -1;
251
vcioinit(&wr, output, sz);
252
253
for(os = 0; os < sz; )
254
{ /* get the size and compressed data of this segment */
255
if((n = vciogetu(&rd)) <= 0)
256
return -1;
257
dt = vcionext(&rd);
258
vcioskip(&rd, n);
259
260
/* decompress the data */
261
if((s = vcapply(pt->vch, dt, n, &dt)) <= 0)
262
return -1;
263
264
/* write out data */
265
if((os += s) > sz)
266
return -1;
267
vcioputs(&wr, dt, s);
268
}
269
270
/* free unused buffers */
271
if(data != orig)
272
vcbuffer(vc, data, -1, -1);
273
vcbuffer(pt->vch, NIL(Vcchar_t*), -1, -1);
274
275
sz = vciosize(&wr);
276
277
if(out)
278
*out = output;
279
return sz;
280
}
281
282
/* Event handler */
283
#if __STD_C
284
static int ptevent(Vcodex_t* vc, int type, Void_t* params)
285
#else
286
static int ptevent(vc, type, params)
287
Vcodex_t* vc;
288
int type;
289
Void_t* params;
290
#endif
291
{
292
Part_t *pt;
293
294
if(type == VC_OPENING)
295
{ if(!(pt = (Part_t*)malloc(sizeof(Part_t))) )
296
return -1;
297
298
/* open the entropy coder handle */
299
if(!(pt->vch = vcopen(NIL(Vcdisc_t*), Vchuffman, 0, 0, vc->flags)) )
300
{ free(pt);
301
return -1;
302
}
303
304
pt->ctab = 0;
305
pt->io = NIL(Vcio_t*);
306
307
vcsetmtdata(vc, pt);
308
return 0;
309
}
310
else if(type == VC_CLOSING)
311
{ if((pt = vcgetmtdata(vc, Part_t*)) )
312
{ if(pt->vch)
313
vcclose(pt->vch);
314
free(pt);
315
}
316
vcsetmtdata(vc, NIL(Part_t*));
317
return 0;
318
}
319
else if(type == VC_FREEBUFFER)
320
{ if((pt = vcgetmtdata(vc, Part_t*)) && pt->vch)
321
vcbuffer(pt->vch, NIL(Vcchar_t*), -1, -1);
322
return 0;
323
}
324
else return 0;
325
}
326
327
Vcmethod_t _Vchuffpart =
328
{ pthuff,
329
ptunhuff,
330
ptevent,
331
"huffpart", "Huffman encoding by partitioning.",
332
"[-version?huffpart (AT&T Research) 2003-01-01]" USAGE_LICENSE,
333
NIL(Vcmtarg_t*),
334
1024*1024,
335
0
336
};
337
338
VCLIB(Vchuffpart)
339
340