Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libvcodex/Vchuff/vchuffgroup.c
1810 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 2003-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Phong Vo <[email protected]> *
18
* *
19
***********************************************************************/
20
#include "vchhdr.h"
21
22
/* Group segments of data for more effective Huffman coding.
23
**
24
** Written by Binh Dao Vo and Kiem-Phong Vo
25
*/
26
27
#define GRP_NTBL 32 /* max number of tables */
28
#define GRP_IMAX 20 /* max initial number of tables */
29
#define GRP_IMIN 4 /* min initial number of tables */
30
#define GRP_ITER 3 /* desired number of iterations */
31
32
#define GRP_HUGE ((ssize_t)((~((size_t)0)) >> 1) )
33
34
typedef struct _table_s
35
{ ssize_t size[VCH_SIZE]; /* code length table */
36
int maxs; /* max code length */
37
int runb; /* the run object if any */
38
ssize_t nblks; /* # of associated blocks */
39
ssize_t cost; /* cost of encoding */
40
} Table_t;
41
42
typedef struct _obj_s
43
{ Vchobj_t obj; /* object */
44
Vcchar_t size; /* Huffman code size in part */
45
short freq; /* frequency */
46
} Obj_t;
47
48
typedef struct _group_s
49
{ Vcodex_t* huf; /* Huffman coder/decoder */
50
Vcodex_t* mtf; /* MTF coder/decoder */
51
52
ssize_t ptsz; /* actual part size */
53
ssize_t npts; /* number of parts */
54
Vcchar_t* part; /* table index for each part */
55
Vcchar_t* work; /* working space for indices */
56
ssize_t* ppos; /* position of part[] in objs[] */
57
ssize_t* sort; /* for sorting part positions */
58
59
Obj_t* obj; /* objs and their frequencies */
60
61
ssize_t hufsz; /* size of single Huffman code */
62
ssize_t cmpsz; /* current best compressed size */
63
ssize_t ntbl; /* number of coding tables */
64
Table_t tbl[GRP_NTBL]; /* best coding tables */
65
} Group_t;
66
67
#undef GRPfreq
68
#undef GRPFREQ
69
#undef GRPsize
70
#undef GRPSIZE
71
72
/* sum frequencies for distinct objects */
73
#define GRPfreq(fr,oo,kk) \
74
switch(kk) \
75
{ case 16: fr[oo->obj] += oo->freq; oo++; case 15: fr[oo->obj] += oo->freq; oo++; \
76
case 14: fr[oo->obj] += oo->freq; oo++; case 13: fr[oo->obj] += oo->freq; oo++; \
77
case 12: fr[oo->obj] += oo->freq; oo++; case 11: fr[oo->obj] += oo->freq; oo++; \
78
case 10: fr[oo->obj] += oo->freq; oo++; case 9: fr[oo->obj] += oo->freq; oo++; \
79
case 8: fr[oo->obj] += oo->freq; oo++; case 7: fr[oo->obj] += oo->freq; oo++; \
80
case 6: fr[oo->obj] += oo->freq; oo++; case 5: fr[oo->obj] += oo->freq; oo++; \
81
case 4: fr[oo->obj] += oo->freq; oo++; case 3: fr[oo->obj] += oo->freq; oo++; \
82
case 2: fr[oo->obj] += oo->freq; oo++; case 1: fr[oo->obj] += oo->freq; oo++; \
83
}
84
#define GRPFREQ(fr,o,n) \
85
do { Obj_t* oo = (Obj_t*)(o); \
86
ssize_t nn = (ssize_t)(n); \
87
for(; nn > 0; nn -= VCH_SW) GRPfreq(fr, oo, nn >= VCH_SW ? VCH_SW : nn); \
88
} while(0)
89
90
/* accumulate coding size of all objects */
91
#define GRPsize(v,sz,oo,kk) \
92
switch(kk) \
93
{ case 16: v += sz[oo->obj] * oo->freq; oo++; case 15: v += sz[oo->obj] * oo->freq; oo++; \
94
case 14: v += sz[oo->obj] * oo->freq; oo++; case 13: v += sz[oo->obj] * oo->freq; oo++; \
95
case 12: v += sz[oo->obj] * oo->freq; oo++; case 11: v += sz[oo->obj] * oo->freq; oo++; \
96
case 10: v += sz[oo->obj] * oo->freq; oo++; case 9: v += sz[oo->obj] * oo->freq; oo++; \
97
case 8: v += sz[oo->obj] * oo->freq; oo++; case 7: v += sz[oo->obj] * oo->freq; oo++; \
98
case 6: v += sz[oo->obj] * oo->freq; oo++; case 5: v += sz[oo->obj] * oo->freq; oo++; \
99
case 4: v += sz[oo->obj] * oo->freq; oo++; case 3: v += sz[oo->obj] * oo->freq; oo++; \
100
case 2: v += sz[oo->obj] * oo->freq; oo++; case 1: v += sz[oo->obj] * oo->freq; oo++; \
101
}
102
#define GRPSIZE(v,sz,o,n) \
103
do { Obj_t* oo = (Obj_t*)(o); \
104
ssize_t nn = (ssize_t)(n); v = 0; \
105
for(; nn > 0; nn -= VCH_SW) GRPsize(v, sz, oo, nn >= VCH_SW ? VCH_SW : nn); \
106
} while(0)
107
108
109
#if __STD_C
110
static int objcmp(Void_t* one, Void_t* two, Void_t* hdl)
111
#else
112
static int objcmp(one, two, hdl)
113
Void_t* one;
114
Void_t* two;
115
Void_t* hdl;
116
#endif
117
{
118
int d;
119
Obj_t *o1 = (Obj_t*)one;
120
Obj_t *o2 = (Obj_t*)two;
121
122
if((d = o1->size - o2->size) != 0)
123
return d;
124
else return (int)o1->obj - (int)o2->obj;
125
}
126
127
/* Construct a list of distinct objects and frequencies from data[]. This list
128
** is used for fast computation of total frequencies, encoding lengths, etc.
129
** It is good for data transformed by a Burrows-Wheeler transform since the
130
** distribution is skewed toward a few small values.
131
*/
132
#if __STD_C
133
static int grpinit(Group_t* grp, Vchobj_t* data, size_t dtsz, ssize_t ptsz)
134
#else
135
static int grpinit(grp, data, dtsz, ptsz)
136
Group_t* grp;
137
Vchobj_t* data;
138
size_t dtsz;
139
ssize_t ptsz;
140
#endif
141
{
142
ssize_t freq[VCH_SIZE], size[VCH_SIZE];
143
ssize_t i, k, d, p, npts;
144
145
if(ptsz >= (ssize_t)dtsz )
146
ptsz = (ssize_t)dtsz;
147
grp->ptsz = ptsz;
148
grp->npts = npts = (dtsz+ptsz-1)/ptsz; /* guaranteed >= 1 */
149
grp->cmpsz = (dtsz < VCH_SIZE ? VCH_SIZE : dtsz)*VC_BITSIZE; /* starting cost */
150
grp->ntbl = 0;
151
152
if(grp->part)
153
free(grp->part);
154
if(!(grp->part = (Vcchar_t*)calloc(2*npts, sizeof(Vcchar_t))) )
155
return -1;
156
grp->work = grp->part + npts;
157
158
if(grp->ppos)
159
free(grp->ppos);
160
if(!(grp->ppos = (ssize_t*)calloc((2*npts+1), sizeof(ssize_t))) )
161
return -1;
162
grp->sort = grp->ppos + npts+1;
163
164
if(grp->obj)
165
free(grp->obj);
166
if(!(grp->obj = (Obj_t*)calloc(dtsz, sizeof(Obj_t))) )
167
return -1;
168
169
/* ptsz is such that a object frequency should fit in a byte */
170
for(d = 0, p = 0, i = 0; i < npts; i += 1, d += ptsz)
171
{ grp->ppos[i] = p;
172
173
CLRTABLE(freq,VCH_SIZE);
174
ADDFREQ(freq, Vchobj_t*, data+d, i == npts-1 ? dtsz-d : ptsz);
175
CLRTABLE(size,VCH_SIZE);
176
vchsize(VCH_SIZE, freq, size, 0);
177
for(k = 0; k < VCH_SIZE; ++k)
178
{ if(freq[k] != 0)
179
{ grp->obj[p].obj = (Vchobj_t)k;
180
grp->obj[p].size = (Vcchar_t)size[k];
181
grp->obj[p].freq = (short)freq[k];
182
p += 1;
183
}
184
}
185
186
/* sort by code lengths */
187
vcqsort(grp->obj+grp->ppos[i], p-grp->ppos[i], sizeof(Obj_t), objcmp, 0);
188
}
189
grp->ppos[npts] = p;
190
191
return 0;
192
}
193
194
/* sorting parts by heaviest elements */
195
#if __STD_C
196
static int partcmp(Void_t* one, Void_t* two, Void_t* disc)
197
#else
198
static int partcmp(one, two, disc)
199
Void_t* one;
200
Void_t* two;
201
Void_t* disc;
202
#endif
203
{
204
int p, q, n, m, d;
205
Group_t *grp = (Group_t*)disc;
206
207
p = *((size_t*)one); m = grp->ppos[p+1] - grp->ppos[p]; p = grp->ppos[p];
208
q = *((size_t*)two); n = grp->ppos[q+1] - grp->ppos[q]; q = grp->ppos[q];
209
if(m > n)
210
m = n;
211
if(m > 8)
212
m = 8;
213
214
for(n = 0; n < m; ++n )
215
if((d = (int)grp->obj[p+n].obj - (int)grp->obj[q+n].obj) != 0 )
216
return d;
217
218
for(n = 0; n < m; ++n )
219
if((d = (int)grp->obj[p+n].freq - (int)grp->obj[q+n].freq) != 0 )
220
return d;
221
222
return p-q;
223
}
224
225
/* compute an optimal clustering with ntbl clusters */
226
#if __STD_C
227
static void grppart(Group_t* grp, ssize_t ntbl, int niter)
228
#else
229
static void grppart(grp, ntbl, niter)
230
Group_t* grp;
231
ssize_t ntbl; /* # of tables aiming for */
232
int niter; /* # of iterations to run */
233
#endif
234
{
235
ssize_t i, k, p, q, z, n, t, iter;
236
Vcchar_t *dt, tmp[VCH_SIZE];
237
Table_t tbl[GRP_NTBL];
238
int map[GRP_NTBL];
239
ssize_t freq[GRP_NTBL][VCH_SIZE], pfr[VCH_SIZE], psz[VCH_SIZE], *fr, *sz;
240
Vcchar_t *part = grp->part, *work = grp->work;
241
ssize_t npts = grp->npts, *ppos = grp->ppos, *sort = grp->sort;
242
Obj_t *obj = grp->obj;
243
244
if(ntbl > npts)
245
ntbl = npts;
246
if(ntbl > GRP_NTBL)
247
ntbl = GRP_NTBL;
248
249
if(grp->ntbl <= 0 || ntbl < grp->ntbl) /* making new tables */
250
{ /* sort parts so that similar prefixes group together */
251
for(k = 0; k < npts; ++k)
252
sort[k] = k;
253
vcqsort(sort, npts, sizeof(ssize_t), partcmp, grp);
254
255
/* now make tables */
256
for(z = npts/ntbl, p = 0, t = 0; t < ntbl; t += 1)
257
{ fr = freq[t]; CLRTABLE(fr, VCH_SIZE);
258
259
for(n = p+z > npts ? (npts-p) : z; n > 0; --n, ++p)
260
{ k = sort[p];
261
GRPFREQ(fr, obj+ppos[k], ppos[k+1]-ppos[k]);
262
}
263
264
tbl[t].maxs = vchsize(VCH_SIZE, fr, tbl[t].size, &tbl[t].runb);
265
tbl[t].nblks = 0;
266
tbl[t].cost = 0;
267
}
268
}
269
else /* increasing number of tables */
270
{ /**/DEBUG_ASSERT(ntbl <= GRP_NTBL && grp->ntbl <= GRP_NTBL);
271
memcpy(tbl,grp->tbl,grp->ntbl*sizeof(Table_t));
272
n = ntbl - grp->ntbl; ntbl = grp->ntbl;
273
for(; n > 0; --n)
274
{ for(z = 0, p = -1, i = 0; i < grp->ntbl; ++i)
275
{ if(tbl[i].cost <= z)
276
continue;
277
z = tbl[p = i].cost;
278
}
279
if(p < 0) /* if p >= 0, it's the highest cost table */
280
break;
281
282
/* split blocks of table p into two tables, p and q */
283
q = ntbl; ntbl += 1;
284
z = tbl[p].nblks/2 - 1; fr = freq[p]; CLRTABLE(fr, VCH_SIZE);
285
for(i = 0; i < npts; ++i)
286
{ if(work[i] != p)
287
continue;
288
GRPFREQ(fr, obj+ppos[i], ppos[i+1]-ppos[i]);
289
if((z -= 1) == 0) /* start 2nd table */
290
{ fr = freq[q]; CLRTABLE(fr, VCH_SIZE); }
291
}
292
293
/* make sure neither table will considered for further splitting */
294
tbl[p].maxs = vchsize(VCH_SIZE, freq[p], tbl[p].size, &tbl[p].runb);
295
tbl[q].maxs = vchsize(VCH_SIZE, freq[q], tbl[q].size, &tbl[q].runb);
296
tbl[p].cost = tbl[q].cost = 0;
297
tbl[p].nblks = tbl[q].nblks = 0;
298
}
299
}
300
301
/**/DEBUG_PRINT(2,"\tgrppart: #table aiming for=%d\n",ntbl);
302
for(iter = 1;; iter++)
303
{ /**/DEBUG_PRINT(2,"\t\titer=%d ", iter); DEBUG_PRINT(2,"cmpsz=%d ", (grp->cmpsz+7)/8);
304
305
for(k = 0; k < ntbl; ++k)
306
{ fr = freq[k]; sz = tbl[k].size;
307
if((z = tbl[k].maxs) > 0)
308
{ z += (z <= 4 ? 15 : z <= 8 ? 7 : z <= 16 ? 1 : 0);
309
for(p = 0; p < VCH_SIZE; ++p)
310
{ if(fr[p] != 0)
311
fr[p] = 0; /* clear frequency table */
312
else sz[p] = z; /* 0-freq obj gets dflt length */
313
}
314
}
315
tbl[k].cost = 0;
316
tbl[k].nblks = 0;
317
}
318
319
for(i = 0; i < npts; i += 1)
320
{ ssize_t bestz, bestt;
321
322
/* find the table best matching this part */
323
p = ppos[i]; n = ppos[i+1] - ppos[i];
324
for(bestz = GRP_HUGE, bestt = -1, k = 0; k < ntbl; ++k)
325
{ if(tbl[k].maxs == 0) /* representing a run */
326
z = (n == 1 && obj[p].obj == tbl[k].runb) ? 0 : GRP_HUGE;
327
else /* normal table, tally up the lengths */
328
{ sz = tbl[k].size; GRPSIZE(z, sz, obj+p, n); }
329
if(z < bestz || bestt < 0)
330
{ bestz = z;
331
bestt = k;
332
}
333
}
334
335
work[i] = bestt; /* assignment, then add frequencies */
336
fr = freq[bestt]; GRPFREQ(fr, obj+p, n );
337
tbl[bestt].nblks += 1;
338
}
339
340
/* remove empty tables */
341
for(p = k = 0; k < ntbl; ++k)
342
{ map[k] = k; /* start as identity map */
343
344
if(tbl[k].nblks <= 0) /* empty table */
345
continue;
346
347
if(k > p) /* need to move this table */
348
{ memcpy(tbl+p, tbl+k, sizeof(Table_t));
349
memcpy(freq[p], freq[k], VCH_SIZE*sizeof(ssize_t));
350
map[k] = p; /* table at k was moved to p */
351
}
352
p += 1; /* move beyond known valid slot */
353
}
354
if(p < ntbl) /* tables were moved, reset part indexes */
355
{ for(i = 0; i < npts; ++i)
356
{ /**/DEBUG_ASSERT(work[i] < ntbl);
357
work[i] = map[work[i]];
358
/**/DEBUG_ASSERT(work[i] < p);
359
}
360
ntbl = p;
361
}
362
363
z = 0; /* recompute encoding cost given new grouping */
364
for(k = 0; k < ntbl; ++k)
365
{ fr = freq[k]; sz = tbl[k].size;
366
tbl[k].maxs = vchsize(VCH_SIZE, fr, sz, &tbl[k].runb);
367
if(tbl[k].maxs > 0)
368
{ DOTPRODUCT(p,fr,sz,VCH_SIZE); /* encoding size */
369
n = vchputcode(VCH_SIZE, sz, tbl[k].maxs, tmp, sizeof(tmp));
370
p += (n + vcsizeu(n))*8;
371
tbl[k].cost = p;
372
z += p; /* add to total cost */
373
}
374
else
375
{ /**/DEBUG_ASSERT(tbl[k].runb >= 0);
376
z += (tbl[k].cost = 2*8); /* one 0-byte and the run byte */
377
}
378
}
379
380
if(ntbl > 1) /* add the cost of encoding the indices */
381
{ n = vcapply(grp->mtf,work,npts,&dt); /* mtf transform */
382
CLRTABLE(pfr,VCH_SIZE); ADDFREQ(pfr, Vcchar_t*, dt, n);
383
k = vchsize(VCH_SIZE, pfr, psz, NIL(int*));
384
DOTPRODUCT(p, pfr, psz, VCH_SIZE);
385
n = vchputcode(VCH_SIZE, psz, k, tmp, sizeof(tmp));
386
z += p + (n + vcsizeu(n))*8;
387
}
388
389
/**/DEBUG_PRINT(2,"z=%d\n", (z+7)/8);
390
if(z > grp->hufsz)
391
{ /**/DEBUG_PRINT(2, "Stop iterating: z=%d > huffman size\n", z);
392
grp->ntbl = 0;
393
return;
394
}
395
396
if(z < (p = grp->cmpsz) )
397
{ grp->ntbl = ntbl;
398
grp->cmpsz = z;
399
memcpy(part, work, npts);
400
memcpy(grp->tbl, tbl, ntbl*sizeof(Table_t));
401
}
402
403
if(ntbl == 1 || iter >= niter || (iter > 1 && z >= p-64) )
404
{ /**/DEBUG_PRINT(2,"\t\t#table=%d ",grp->ntbl);
405
/**/DEBUG_PRINT(2,"cmpsz=%d\n", (grp->cmpsz+7)/8);
406
return;
407
}
408
}
409
}
410
411
/* select a part size based on the data size. */
412
static ssize_t grpptsz(size_t dtsz)
413
{
414
ssize_t ptsz, exp, lnz;
415
416
lnz = vclogi(dtsz); /* log_2 of data size */
417
if(lnz >= 16)
418
{ exp = (exp = (lnz-15)) > 8 ? 8 : exp;
419
ptsz = (1 << exp) * lnz; /* ptsz here is >= 32 */
420
}
421
else ptsz = dtsz/(1<<10); /* ptsz here <= 64 */
422
423
ptsz = ptsz < 16 ? 16 : ptsz > 1024 ? 1024 : ptsz;
424
/**/DEBUG_PRINT(2,"grpptsz: dtsz=%d, ", dtsz);
425
/**/DEBUG_PRINT(2,"lnz=%d, ", lnz);
426
/**/DEBUG_PRINT(2,"ptsz=%d\n",ptsz);
427
return ptsz;
428
}
429
430
#if __STD_C
431
static ssize_t grphuff(Vcodex_t* vc, const Void_t* data, size_t dtsz, Void_t** out)
432
#else
433
static ssize_t grphuff(vc, data, dtsz, out)
434
Vcodex_t* vc; /* Vcodex handle */
435
Void_t* data; /* target data to be compressed */
436
size_t dtsz; /* data size */
437
Void_t** out; /* to return output buffer */
438
#endif
439
{
440
ssize_t n, i, p, k;
441
ssize_t *sz, npts, ntbl, ptsz, itbl;
442
ssize_t freq[VCH_SIZE], size[VCH_SIZE];
443
Vcchar_t *part;
444
Table_t *tbl;
445
Vcbit_t b, *bt, bits[GRP_NTBL][VCH_SIZE];
446
Vcchar_t *output, *dt;
447
ssize_t n_output;
448
Vcio_t io;
449
Group_t *grp = vcgetmtdata(vc, Group_t*);
450
451
if(dtsz == 0)
452
return 0;
453
454
/* get the size of doing Huffman alone */
455
CLRTABLE(freq,VCH_SIZE);
456
ADDFREQ(freq, Vchobj_t*, data, dtsz);
457
CLRTABLE(size,VCH_SIZE);
458
vchsize(VCH_SIZE, freq, size, 0);
459
DOTPRODUCT(grp->hufsz, freq, size, VCH_SIZE);
460
grp->hufsz += 256*8; /* plus est. header cost */
461
462
/* set desired part size and bounds for number of groups */
463
ptsz = grpptsz(dtsz);
464
465
/* initialize data structures for fast frequency calculations */
466
if(grpinit(grp, (Vcchar_t*)data, dtsz, ptsz) < 0)
467
return -1;
468
469
/* now make the best grouping */
470
if((itbl = vclogi(grp->npts)) > GRP_IMAX)
471
itbl = GRP_IMAX;
472
else if(itbl < GRP_IMIN)
473
itbl = GRP_IMIN;
474
/**/DEBUG_PRINT(2,"grphuff: dtsz=%d, ", dtsz); DEBUG_PRINT(2,"ptsz=%d, ", ptsz);
475
/**/DEBUG_PRINT(2,"npts=%d, ", grp->npts); DEBUG_PRINT(2,"itbl=%d\n", itbl);
476
grppart(grp, itbl, GRP_ITER);
477
if(grp->ntbl == 0) /* single table, simplify */
478
{ grpinit(grp, (Vcchar_t*)data, dtsz, dtsz);
479
grppart(grp, 1, 1);
480
}
481
482
/* short-hands */
483
part = grp->part; npts = grp->npts; ptsz = grp->ptsz;
484
tbl = grp->tbl; ntbl = grp->ntbl;
485
486
/* get space for output */
487
n_output = (ntbl+1)*(VCH_SIZE+8) + (grp->cmpsz+7)/8;
488
if(!(output = vcbuffer(vc, NIL(Vcchar_t*), n_output, 0)) )
489
return -1;
490
vcioinit(&io, output, n_output);
491
492
vcioputu(&io, dtsz);
493
vcioputu(&io, ntbl);
494
vcioputu(&io, ptsz); /* the part size used */
495
496
/* output the coding tables and compute the coding bits */
497
for(k = 0; k < ntbl; ++k)
498
{ vcioputc(&io, tbl[k].maxs);
499
if(tbl[k].maxs == 0) /* coding a run */
500
vcioputc(&io,tbl[k].runb);
501
else /* coding a code table */
502
{ dt = vcionext(&io); n = vciomore(&io);
503
if((n = vchputcode(VCH_SIZE, tbl[k].size, tbl[k].maxs, dt, n)) < 0)
504
return -1;
505
else vcioskip(&io, n);
506
vchbits(VCH_SIZE, tbl[k].size, bits[k]);
507
}
508
}
509
510
/* compress and output the indices */
511
if((n = vcapply(grp->mtf, part, npts, &dt)) < 0 )
512
return -1;
513
if((n = vcapply(grp->huf, dt, n, &dt)) < 0 )
514
return -1;
515
vcioputu(&io,n);
516
vcioputs(&io,dt,n);
517
vcbuffer(grp->mtf, NIL(Vcchar_t*), -1, -1);
518
vcbuffer(grp->huf, NIL(Vcchar_t*), -1, -1);
519
520
/* now write out the encoded data */
521
vciosetb(&io, b, n, VC_ENCODE);
522
for(p = 0, i = 0; i < npts; i += 1, p += ptsz)
523
{ if(tbl[part[i]].maxs == 0)
524
continue;
525
526
sz = tbl[part[i]].size;
527
bt = bits[part[i]];
528
dt = ((Vcchar_t*)data)+p;
529
for(k = i == npts-1 ? dtsz-p : ptsz; k > 0; --k, ++dt)
530
vcioaddb(&io, b, n, bt[*dt], sz[*dt]);
531
}
532
vcioendb(&io, b, n, VC_ENCODE);
533
534
dt = output;
535
n = vciosize(&io); /**/ DEBUG_ASSERT(n <= n_output);
536
if(vcrecode(vc, &output, &n, 0, 0) < 0 )
537
return -1;
538
if(dt != output)
539
vcbuffer(vc, dt, -1, -1);
540
541
if(out)
542
*out = output;
543
return n;
544
}
545
546
#if __STD_C
547
static ssize_t grpunhuff(Vcodex_t* vc, const Void_t* orig, size_t dtsz, Void_t** out)
548
#else
549
static ssize_t grpunhuff(vc, orig, dtsz, out)
550
Vcodex_t* vc; /* Vcodex handle */
551
Void_t* orig; /* data to be uncompressed */
552
size_t dtsz; /* data size */
553
Void_t** out; /* to return output buffer */
554
#endif
555
{
556
reg Vcbit_t b;
557
ssize_t k, p, sz, n, ntop;
558
short *node, *size;
559
ssize_t npts, ptsz, ntbl;
560
Vcchar_t *part, *dt, *output, *o, *endo, *data;
561
Table_t tbl[GRP_NTBL];
562
Vcbit_t bits[GRP_NTBL][VCH_SIZE];
563
Vchtrie_t *trie[GRP_NTBL];
564
Vcio_t io;
565
Group_t *grp = vcgetmtdata(vc, Group_t*);
566
int rv = -1;
567
568
if(dtsz == 0)
569
return 0;
570
571
data = (Vcchar_t*)orig; sz = (ssize_t)dtsz;
572
if(vcrecode(vc, &data, &sz, 0, 0) < 0 )
573
return -1;
574
dtsz = sz;
575
576
for(k = 0; k < GRP_NTBL; ++k)
577
trie[k] = NIL(Vchtrie_t*);
578
579
vcioinit(&io,data,dtsz);
580
581
if((sz = (ssize_t)vciogetu(&io)) < 0) /* size of decoded data */
582
goto done;
583
if((ntbl = (ssize_t)vciogetu(&io)) < 0) /* # of coding tables */
584
goto done;
585
if((ptsz = (ssize_t)vciogetu(&io)) < 0) /* size of each part */
586
goto done;
587
588
for(k = 0; k < ntbl; ++k)
589
{ if((tbl[k].maxs = vciogetc(&io)) < 0) /* max code size */
590
goto done;
591
else if(tbl[k].maxs == 0) /* this is a run */
592
tbl[k].runb = vciogetc(&io);
593
else /* construct code table and trie for fast matching */
594
{ dt = vcionext(&io); n = vciomore(&io);
595
if((n = vchgetcode(VCH_SIZE, tbl[k].size, tbl[k].maxs, dt, n)) < 0)
596
goto done;
597
else vcioskip(&io,n);
598
599
vchbits(VCH_SIZE, tbl[k].size, bits[k]);
600
if(!(trie[k] = vchbldtrie(VCH_SIZE, tbl[k].size, bits[k])) )
601
goto done;
602
}
603
}
604
605
/* reconstruct the array of part indices */
606
if((n = vciogetu(&io)) < 0)
607
goto done;
608
dt = vcionext(&io); vcioskip(&io,n);
609
if((n = vcapply(grp->huf, dt, n, &dt)) < 0)
610
goto done;
611
if((npts = vcapply(grp->mtf, dt, n, &part)) < 0)
612
goto done;
613
614
/* buffer to reconstruct the original data */
615
if(!(output = vcbuffer(vc, (Vcchar_t*)0, sz, 0)) )
616
goto done;
617
endo = (o = output) + sz;
618
619
vciosetb(&io, b, n, VC_DECODE);
620
for(k = 0; k < npts; ++k)
621
{ dt = o + (k == npts-1 ? endo-o : ptsz); /* end of this part */
622
if(tbl[part[k]].maxs == 0) /* reconstruct a run */
623
{ p = tbl[part[k]].runb;
624
while(o < dt)
625
*o++ = (Vcchar_t)p;
626
}
627
else /* reconstructing a Huffman-coded set of bytes */
628
{ node = trie[part[k]]->node;
629
size = trie[part[k]]->size;
630
ntop = trie[part[k]]->ntop;
631
for(sz = ntop, p = 0;;)
632
{ vciofilb(&io, b, n, sz);
633
634
p += (b >> (VC_BITSIZE-sz));
635
if(size[p] > 0)
636
{ vciodelb(&io, b, n, size[p]);
637
*o = (Vcchar_t)node[p];
638
if((o += 1) >= dt)
639
break;
640
sz = ntop; p = 0;
641
}
642
else if(size[p] == 0)
643
return -1;
644
else
645
{ vciodelb(&io, b, n, sz);
646
sz = -size[p]; p = node[p];
647
}
648
}
649
}
650
} /**/DEBUG_ASSERT(o == endo);
651
vcioendb(&io, b, n, VC_DECODE);
652
653
if(out)
654
*out = output;
655
rv = o-output;
656
657
done: for(k = 0; k < GRP_NTBL; ++k)
658
if(trie[k])
659
vchdeltrie(trie[k]);
660
661
vcbuffer(grp->huf, NIL(Vcchar_t*), -1, -1);
662
vcbuffer(grp->mtf, NIL(Vcchar_t*), -1, -1);
663
664
if(data != orig)
665
vcbuffer(vc, data, -1, -1);
666
667
return rv;
668
}
669
670
/* Event handler */
671
#if __STD_C
672
static int grpevent(Vcodex_t* vc, int type, Void_t* params)
673
#else
674
static int grpevent(vc, type, params)
675
Vcodex_t* vc;
676
int type;
677
Void_t* params;
678
#endif
679
{
680
Group_t *grp;
681
int rv = -1;
682
683
if(type == VC_OPENING)
684
{ if(!(grp = (Group_t*)calloc(1, sizeof(Group_t))) )
685
return -1;
686
687
/* open the entropy coder handle */
688
grp->huf = vcopen(NIL(Vcdisc_t*), Vchuffman, 0, 0, vc->flags);
689
grp->mtf = vcopen(NIL(Vcdisc_t*), Vcmtf, 0, 0, vc->flags);
690
if(!grp->huf || !grp->mtf )
691
goto do_closing;
692
693
vcsetmtdata(vc, grp);
694
return 0;
695
}
696
else if(type == VC_CLOSING)
697
{ rv = 0;
698
do_closing:
699
if((grp = vcgetmtdata(vc, Group_t*)) )
700
{ if(grp->huf)
701
vcclose(grp->huf);
702
if(grp->mtf)
703
vcclose(grp->mtf);
704
if(grp->part)
705
free(grp->part);
706
if(grp->ppos)
707
free(grp->ppos);
708
if(grp->obj)
709
free(grp->obj);
710
free(grp);
711
}
712
713
vcsetmtdata(vc, NIL(Group_t*));
714
return rv;
715
}
716
else if(type == VC_FREEBUFFER)
717
{ if((grp = vcgetmtdata(vc, Group_t*)) )
718
{ if(grp->mtf)
719
vcbuffer(grp->mtf, NIL(Vcchar_t*), -1, -1);
720
if(grp->huf)
721
vcbuffer(grp->huf, NIL(Vcchar_t*), -1, -1);
722
}
723
return 0;
724
}
725
else return 0;
726
}
727
728
Vcmethod_t _Vchuffgroup =
729
{ grphuff,
730
grpunhuff,
731
grpevent,
732
"huffgroup", "Huffman encoding by groups",
733
"[-version?huffgroup (AT&T Research) 2003-01-01]" USAGE_LICENSE,
734
0,
735
1024*1024,
736
0
737
};
738
739
VCLIB(Vchuffgroup)
740
741