Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libvcodex/Vcmisc/vcrle.c
1810 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 2003-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Phong Vo <[email protected]> *
18
* *
19
***********************************************************************/
20
#include <vclib.h>
21
22
/* Various run-length-encoding methods.
23
**
24
** Written by Kiem-Phong Vo ([email protected])
25
*/
26
27
typedef struct _rle_s Rle_t;
28
typedef ssize_t (*Rle_f)_ARG_((Rle_t*, int));
29
static ssize_t rle0 _ARG_((Rle_t*, int));
30
static ssize_t rle1 _ARG_((Rle_t*, int));
31
static ssize_t rle2 _ARG_((Rle_t*, int));
32
static ssize_t rleg _ARG_((Rle_t*, int));
33
34
struct _rle_s
35
{
36
Rle_f rlef; /* rle function to call */
37
Vcchar_t* ibuf; /* default input data */
38
ssize_t isiz;
39
40
Vcchar_t* obuf; /* default output data */
41
ssize_t osiz;
42
Vcchar_t* endo; /* upper bound of array */
43
44
Vcchar_t* abuf; /* auxiliary buffer */
45
ssize_t asiz;
46
47
Vcchar_t run1; /* run-heads of rle2() */
48
Vcchar_t run2;
49
};
50
51
/* arguments to select type of run length coder */
52
static Vcmtarg_t _Rleargs[] =
53
{ { "0", "Run-length-encoding: 0-sequences only.", (Void_t*)rle0 },
54
{ "1", "Run-length-encoding: 0&1-sequences only.", (Void_t*)rle1 },
55
{ "2", "Run-length-encoding: Alphabet has only two letters.", (Void_t*)rle2 },
56
{ 0 , "General run-length-encoding.", (Void_t*)rleg }
57
};
58
59
/* Encoding 0-run's only. Useful for sequences undergone entropy reduction
60
transforms such as Burrows-Wheele + MTF or the column prediction method.
61
*/
62
#if __STD_C
63
static ssize_t rle0(Rle_t* rle, int encoding)
64
#else
65
static ssize_t rle0(rle, encoding)
66
Rle_t* rle;
67
int encoding;
68
#endif
69
{
70
ssize_t c, z;
71
Vcchar_t *dt, *enddt, *o, *endo;
72
Vcio_t io;
73
74
enddt = (dt = rle->ibuf) + rle->isiz;
75
o = rle->obuf; rle->osiz = 0;
76
77
if(encoding)
78
{ for(z = -1; dt < enddt; ++dt)
79
{ if((c = *dt) == 0)
80
z += 1;
81
else
82
{ if(z >= 0) /* encode the 0-run */
83
{ /**/DEBUG_PRINT(9,"%d\n",z);
84
vcioinit(&io, o, 8*sizeof(ssize_t));
85
vcioput2(&io, z, 0, RL_ZERO);
86
o = vcionext(&io);
87
z = -1;
88
}
89
if(c >= RL_ZERO) /* literal encoding */
90
*o++ = RL_ESC;
91
*o++ = c;
92
}
93
}
94
if(z >= 0) /* final 0-run if any */
95
{ /**/DEBUG_PRINT(9,"%d\n",z);
96
vcioinit(&io, o, 8*sizeof(ssize_t));
97
vcioput2(&io, z, 0, RL_ZERO);
98
o = vcionext(&io);
99
}
100
}
101
else
102
{ for(endo = rle->endo; dt < enddt; )
103
{ if((c = *dt++) == RL_ESC)
104
{ if(dt >= enddt) /* premature EOF */
105
return -1;
106
c = *dt++;
107
if(o >= endo || c < RL_ZERO) /* corrupted data */
108
return -1;
109
*o++ = c;
110
}
111
else if(c == 0 || c == RL_ZERO)
112
{ vcioinit(&io, dt-1, (enddt-dt)+1);
113
z = vcioget2(&io, 0, RL_ZERO); /**/DEBUG_PRINT(9,"%d\n",z);
114
dt = vcionext(&io);
115
if(o+z > endo)
116
return -1;
117
for(; z >= 0; --z)
118
*o++ = 0;
119
}
120
else
121
{ if(o >= endo)
122
return -1;
123
*o++ = c;
124
}
125
}
126
}
127
128
return (rle->osiz = o - rle->obuf);
129
}
130
131
/* Like rle0() but including 1-run's as well as 0-runs */
132
#if __STD_C
133
static ssize_t rle1(Rle_t* rle, int encoding)
134
#else
135
static ssize_t rle1(rle, encoding)
136
Rle_t* rle;
137
int encoding;
138
#endif
139
{
140
ssize_t c, rc, rz;
141
Vcchar_t *dt, *enddt, *o, *endo;
142
Vcio_t io;
143
144
enddt = (dt = rle->ibuf) + rle->isiz;
145
o = rle->obuf; rle->osiz = 0;
146
147
if(encoding)
148
{ for(rc = rz = -1; dt < enddt; ++dt)
149
{ if((c = *dt) == 0 || c == 1)
150
{ if(c == rc)
151
rz += 1;
152
else
153
{ if(rz >= 0)
154
{ vcioinit(&io, o, 8*sizeof(ssize_t));
155
if(rc == 0)
156
vcioput2(&io, rz, 0, RL_ZERO);
157
else vcioput2(&io, rz, 1, RL_ONE);
158
o = vcionext(&io);
159
}
160
161
rc = c; rz = 0;
162
}
163
}
164
else
165
{ if(rz >= 0) /* encode the 0/1-run */
166
{ vcioinit(&io, o, 8*sizeof(ssize_t));
167
if(rc == 0)
168
vcioput2(&io, rz, 0, RL_ZERO);
169
else vcioput2(&io, rz, 1, RL_ONE);
170
o = vcionext(&io);
171
172
rc = rz = -1;
173
}
174
175
if(c >= RL_ONE) /* literal encoding */
176
*o++ = RL_ESC;
177
*o++ = c;
178
}
179
}
180
if(rz >= 0) /* final 0/1-run if any */
181
{ vcioinit(&io, o, 8*sizeof(ssize_t));
182
if(rc == 0)
183
vcioput2(&io, rz, 0, RL_ZERO);
184
else vcioput2(&io, rz, 1, RL_ONE);
185
o = vcionext(&io);
186
}
187
}
188
else
189
{ for(endo = rle->endo; dt < enddt; )
190
{ if((c = *dt++) == RL_ESC)
191
{ if(dt >= enddt) /* premature EOF */
192
return -1;
193
c = *dt++;
194
if(o >= endo || c < RL_ONE) /* corrupted data */
195
return -1;
196
*o++ = c;
197
}
198
else if(c == 0 || c == RL_ZERO || c == 1 || c == RL_ONE)
199
{ vcioinit(&io, dt-1, (enddt-dt)+1);
200
if(c == 0 || c == RL_ZERO)
201
rz = vcioget2(&io, 0, RL_ZERO);
202
else rz = vcioget2(&io, 1, RL_ONE);
203
dt = vcionext(&io);
204
if(o+rz > endo)
205
return -1;
206
if(c == 0 || c == RL_ZERO)
207
{ for(; rz >= 0; --rz)
208
*o++ = 0;
209
}
210
else
211
{ for(; rz >= 0; --rz)
212
*o++ = 1;
213
}
214
}
215
else
216
{ if(o >= endo)
217
return -1;
218
*o++ = c;
219
}
220
}
221
}
222
223
return (rle->osiz = o - rle->obuf);
224
}
225
226
/* Encoding a sequence with at most two distinct letters */
227
#if __STD_C
228
static ssize_t rle2(Rle_t* rle, int encoding)
229
#else
230
static ssize_t rle2(rle, encoding)
231
Rle_t* rle;
232
int encoding;
233
#endif
234
{
235
Vcchar_t *dt, *enddt;
236
ssize_t sz, n, k, c, c1, c2;
237
Vcio_t io;
238
239
if(encoding)
240
{ dt = rle->ibuf; sz = rle->isiz; /**/DEBUG_ASSERT(sz >= 1);
241
vcioinit(&io, rle->obuf, rle->osiz);
242
243
for(c1 = dt[0], n = 1; n < sz; ++n)
244
if(dt[n] != c1)
245
break;
246
if(n == sz)
247
c2 = c1; /* just a single run */
248
else
249
{ c2 = dt[n];
250
for(c = c1, k = n; k < sz; ++k)
251
{ if(dt[k] == c) /* current run continues */
252
n += 1;
253
else /* end of current run */
254
{ if((c = dt[k]) != c1 && c != c2)
255
RETURN(-1);
256
vcioputu(&io, n-1);
257
n = 1;
258
}
259
}
260
}
261
vcioputu(&io, n-1); /* output length of last run */
262
263
rle->run1 = c1; rle->run2 = c2;
264
return rle->osiz = vciosize(&io);
265
}
266
else
267
{ dt = rle->obuf; enddt = rle->endo;
268
c1 = rle->run1; c2 = rle->run2;
269
vcioinit(&io, rle->ibuf, rle->isiz);
270
for(c = c1; vciomore(&io) > 0;)
271
{ for(n = vciogetu(&io); n >= 0 && dt < enddt; --n, ++dt)
272
*dt = c;
273
c = c == c1 ? c2 : c1; /* alternate run letters */
274
}
275
if(vciomore(&io) != 0)
276
RETURN(-1);
277
278
return (rle->osiz = dt - rle->obuf);
279
}
280
}
281
282
/* General rl-encoding using escape codes */
283
#if __STD_C
284
static ssize_t rleg(Rle_t* rle, int encoding)
285
#else
286
static ssize_t rleg(rle, encoding)
287
Rle_t* rle;
288
int encoding;
289
#endif
290
{
291
Vcchar_t c, *chr, *run, *dt, *enddt, *endb, *nextb;
292
ssize_t r;
293
Vcio_t io;
294
295
if(encoding)
296
{ endb = (dt = rle->ibuf) + rle->isiz;
297
298
/* set buffers for runs and data */
299
chr = rle->obuf;
300
run = rle->abuf;
301
for(; dt < endb; dt = nextb)
302
{ for(c = *dt, nextb = dt+1; nextb < endb; ++nextb)
303
if(*nextb != c)
304
break;
305
if((r = nextb-dt) >= 3 || c == RL_ESC)
306
{ /* in-line small cases here for speed */
307
if(r < (1<<7))
308
*run++ = r;
309
else if(r < (1<<14))
310
{ *run++ = (r>>7)|128;
311
*run++ = r&127;
312
}
313
else
314
{ vcioinit(&io, run, 2*sizeof(ssize_t));
315
vcioputu(&io, r);
316
run = vcionext(&io);
317
}
318
319
*chr++ = RL_ESC;
320
if(c != RL_ESC || r > 1)
321
*chr++ = c;
322
}
323
else
324
{ *chr++ = c;
325
if(r == 2)
326
*chr++ = c;
327
}
328
}
329
330
return (rle->osiz = chr - rle->obuf) + (rle->asiz = run - rle->abuf);
331
}
332
else
333
{ dt = rle->obuf; enddt = rle->endo;
334
vcioinit(&io, rle->abuf, rle->asiz);
335
for(endb = (chr = rle->ibuf) + rle->isiz; chr < endb; )
336
{ if((c = *chr++) != RL_ESC)
337
{ if(dt >= enddt)
338
return -1;
339
*dt++ = c;
340
}
341
else
342
{ r = vciogetu(&io);
343
if(dt+r > enddt)
344
return -1;
345
if(r == 1)
346
*dt++ = RL_ESC;
347
else for(c = *chr++; r > 0; --r)
348
*dt++ = c;
349
}
350
}
351
352
return (rle->osiz = dt - rle->obuf);
353
}
354
}
355
356
357
#if __STD_C
358
static ssize_t vcrle(Vcodex_t* vc, const Void_t* data, size_t size, Void_t** out)
359
#else
360
static ssize_t vcrle(vc, data, size, out)
361
Vcodex_t* vc;
362
Void_t* data;
363
size_t size;
364
Void_t** out;
365
#endif
366
{
367
Vcchar_t *dt, *output, *space;
368
ssize_t k, hd, sz, outsz;
369
Vcio_t io;
370
Rle_t *rle = vcgetmtdata(vc, Rle_t*);
371
372
if(size == 0)
373
return 0;
374
375
/* input data to encode */
376
rle->ibuf = (Vcchar_t*)data;
377
rle->isiz = (ssize_t)size;
378
379
/* size of header in transformed data */
380
hd = vcsizeu(size);
381
if(rle->rlef == rle2)
382
hd += 2; /* for the two run bytes */
383
384
/* allocate buffers for transformed data */
385
if(rle->rlef == rleg) /* size for data + size/2 for lengths */
386
outsz = 2*vcsizeu(size) + size + size/2;
387
else if(rle->rlef == rle2)
388
outsz = size; /* binary coding never exceeds size */
389
else if(size < 64*1024*1024) /* if get here: rle0/1 coding */
390
outsz = 2*size; /* small, just overallocate */
391
else /* count the extra RL_ESC's that might be needed */
392
{ for(dt = rle->ibuf, sz = 0, k = 0; k < size; ++k)
393
if(dt[k] >= RL_ONE)
394
sz += 1;
395
outsz = size + sz;
396
}
397
if(!(output = space = vcbuffer(vc, NIL(Vcchar_t*), outsz+128, hd)) )
398
RETURN(-1);
399
400
if(rle->rlef == rleg)
401
{ rle->obuf = output + vcsizeu(size);
402
rle->endo = rle->obuf + (rle->osiz = size);
403
rle->abuf = rle->endo + vcsizeu(size);
404
rle->asiz = size/2;
405
406
if((sz = (*rle->rlef)(rle, 1)) < 0 )
407
RETURN(-1);
408
409
if(vc->coder) /* run continuator on the two parts */
410
{ if(vcrecode(vc, &rle->obuf, &rle->osiz, 0, 0) < 0)
411
RETURN(-1);
412
if(vcrecode(vc, &rle->abuf, &rle->asiz, 0, 0) < 0)
413
RETURN(-1);
414
}
415
416
sz = vcsizeu(rle->osiz) + rle->osiz + vcsizeu(rle->asiz) + rle->asiz;
417
if(sz > outsz)
418
output = vcbuffer(vc, NIL(Vcchar_t*), sz, hd);
419
420
vcioinit(&io, output, sz);
421
422
vcioputu(&io, rle->osiz);
423
if(rle->obuf != vcionext(&io))
424
vcioputs(&io, rle->obuf, rle->osiz);
425
else vcioskip(&io, rle->osiz);
426
427
vcioputu(&io, rle->asiz);
428
if(rle->abuf != vcionext(&io))
429
vcioputs(&io, rle->abuf, rle->asiz);
430
else vcioskip(&io, rle->asiz);
431
432
sz = vciosize(&io);
433
}
434
else
435
{ rle->obuf = output;
436
rle->endo = rle->obuf + outsz;
437
438
if((sz = (*rle->rlef)(rle, 1)) < 0 )
439
RETURN(-1);
440
441
if(vcrecode(vc, &output, &sz, hd, 0) < 0 )
442
RETURN(-1);
443
}
444
445
if(space != output) /* free space if unused */
446
vcbuffer(vc, space, -1, -1);
447
448
output -= hd; sz += hd;
449
vcioinit(&io, output, hd);
450
vcioputu(&io, size);
451
452
if(rle->rlef == rle2)
453
{ vcioputc(&io, rle->run1);
454
vcioputc(&io, rle->run2);
455
}
456
457
if(!(output = vcbuffer(vc, output, sz, -1)) ) /* truncate buffer to size */
458
RETURN(-1);
459
if(out)
460
*out = output;
461
return sz;
462
}
463
464
#if __STD_C
465
static ssize_t vcunrle(Vcodex_t* vc, const Void_t* data, size_t size, Void_t** out)
466
#else
467
static ssize_t vcunrle(vc, data, size, out)
468
Vcodex_t* vc;
469
Void_t* data;
470
size_t size;
471
Void_t** out;
472
#endif
473
{
474
Vcchar_t *output;
475
ssize_t sz;
476
Vcio_t io;
477
Rle_t *rle = vcgetmtdata(vc, Rle_t*);
478
479
if(size == 0)
480
return 0;
481
482
rle->ibuf = rle->abuf = NIL(Vcchar_t*);
483
rle->isiz = rle->asiz = 0;
484
485
vcioinit(&io, data, size);
486
if((sz = vciogetu(&io)) <= 0 )
487
RETURN(-1);
488
489
if(rle->rlef == rle2) /* get the two run bytes */
490
{ if(vciomore(&io) < 2)
491
RETURN(-1);
492
rle->run1 = vciogetc(&io);
493
rle->run2 = vciogetc(&io);
494
}
495
496
if(!(output = vcbuffer(vc, (Vcchar_t*)0, sz, 0)) )
497
RETURN(-1);
498
rle->obuf = output;
499
rle->endo = rle->obuf + sz;
500
501
if(rle->rlef == rleg)
502
{ if(vciomore(&io) <= 0 || (rle->isiz = vciogetu(&io)) < 0)
503
RETURN(-1);
504
if(vciomore(&io) < rle->isiz)
505
RETURN(-1);
506
rle->ibuf = vcionext(&io);
507
vcioskip(&io, rle->isiz);
508
509
if(vciomore(&io) <= 0 || (rle->asiz = vciogetu(&io)) < 0)
510
RETURN(-1);
511
if(vciomore(&io) < rle->asiz)
512
RETURN(-1);
513
rle->abuf = vcionext(&io);
514
515
/* decode data by secondary encoders */
516
if(vcrecode(vc, &rle->ibuf, &rle->isiz, 0, 0) < 0)
517
RETURN(-1);
518
if(vcrecode(vc, &rle->abuf, &rle->asiz, 0, 0) < 0)
519
RETURN(-1);
520
}
521
else
522
{ if(vciomore(&io) <= 0 || (rle->isiz = vciomore(&io)) < 0 )
523
RETURN(-1);
524
rle->ibuf = vcionext(&io);
525
if(vcrecode(vc, &rle->ibuf, &rle->isiz, 0, 0) < 0)
526
RETURN(-1);
527
}
528
529
if((*rle->rlef)(rle, 0) != sz) /* decode run data */
530
RETURN(-1);
531
532
/* free temporary data */
533
if(rle->ibuf && (rle->ibuf < (Vcchar_t*)data || rle->ibuf >= (Vcchar_t*)data+size) )
534
vcbuffer(vc, rle->ibuf, -1, -1);
535
if(rle->abuf && (rle->abuf < (Vcchar_t*)data || rle->abuf >= (Vcchar_t*)data+size) )
536
vcbuffer(vc, rle->abuf, -1, -1);
537
538
if(out)
539
*out = output;
540
return sz;
541
}
542
543
#if __STD_C
544
static ssize_t rleextract(Vcodex_t* vc, Vcchar_t** datap)
545
#else
546
static ssize_t rleextract(vc, datap)
547
Vcodex_t* vc;
548
Vcchar_t** datap; /* basis string for persistence */
549
#endif
550
{
551
Vcmtarg_t *arg;
552
char *ident;
553
ssize_t n;
554
Rle_t *rle = vcgetmtdata(vc, Rle_t*);
555
556
for(arg = _Rleargs;; ++arg)
557
if(!arg->name || arg->data == (Void_t*)rle->rlef)
558
break;
559
if(!arg->name)
560
return 0;
561
562
n = strlen(arg->name);
563
if(!(ident = (char*)vcbuffer(vc, NIL(Vcchar_t*), sizeof(int)*n+1, 0)) )
564
RETURN(-1);
565
if(!(ident = vcstrcode(arg->name, ident, sizeof(int)*n+1)) )
566
RETURN(-1);
567
if(datap)
568
*datap = (Void_t*)ident;
569
return n;
570
}
571
572
#if __STD_C
573
static Vcodex_t* rlerestore(Vcchar_t* data, ssize_t dtsz)
574
#else
575
static Vcodex_t* rlerestore(data, dtsz)
576
Vcchar_t* data; /* persistence data */
577
ssize_t dtsz;
578
#endif
579
{
580
Vcmtarg_t *arg;
581
char *ident, buf[1024];
582
583
for(arg = _Rleargs; arg->name; ++arg)
584
{ if(!(ident = vcstrcode(arg->name, buf, sizeof(buf))) )
585
return NIL(Vcodex_t*);
586
if(dtsz == strlen(ident) && strncmp(ident, (Void_t*)data, dtsz) == 0)
587
break;
588
}
589
return vcopen(0, Vcrle, (Void_t*)arg->name, 0, VC_DECODE);
590
}
591
592
#if __STD_C
593
static int rleevent(Vcodex_t* vc, int type, Void_t* params)
594
#else
595
static int rleevent(vc, type, params)
596
Vcodex_t* vc;
597
int type;
598
Void_t* params;
599
#endif
600
{
601
Rle_t *rle;
602
Vcmtarg_t *arg;
603
Vcmtcode_t *mtcd;
604
char *data;
605
606
if(type == VC_OPENING )
607
{ for(arg = NIL(Vcmtarg_t*), data = (char*)params; data && *data; )
608
{ data = vcgetmtarg(data, 0, 0, _Rleargs, &arg);
609
if(arg && arg->name)
610
break;
611
}
612
if(!arg)
613
for(arg = _Rleargs;; ++arg)
614
if(!arg->name)
615
break;
616
617
if(!(rle = (Rle_t*)calloc(1,sizeof(Rle_t))) )
618
RETURN(-1);
619
rle->rlef = (Rle_f)arg->data;
620
621
vcsetmtdata(vc, rle);
622
return 0;
623
}
624
else if(type == VC_CLOSING)
625
{ if((rle = vcgetmtdata(vc, Rle_t*)) )
626
free(rle);
627
return 0;
628
}
629
else if(type == VC_EXTRACT)
630
{ if(!(mtcd = (Vcmtcode_t*)params) )
631
RETURN(-1);
632
if((mtcd->size = rleextract(vc, &mtcd->data)) < 0 )
633
RETURN(-1);
634
return 1;
635
}
636
else if(type == VC_RESTORE)
637
{ if(!(mtcd = (Vcmtcode_t*)params) )
638
RETURN(-1);
639
if(!(mtcd->coder = rlerestore(mtcd->data, mtcd->size)) )
640
RETURN(-1);
641
return 1;
642
}
643
644
return 0;
645
}
646
647
Vcmethod_t _Vcrle =
648
{ vcrle,
649
vcunrle,
650
rleevent,
651
"rle", "Run-length encoding.",
652
"[-version?rle (AT&T Research) 2003-01-01]" USAGE_LICENSE,
653
_Rleargs,
654
1024*1024,
655
0
656
};
657
658
VCLIB(Vcrle)
659
660