Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/lib/842/842_compress.c
26278 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* 842 Software Compression
4
*
5
* Copyright (C) 2015 Dan Streetman, IBM Corp
6
*
7
* See 842.h for details of the 842 compressed format.
8
*/
9
10
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
#define MODULE_NAME "842_compress"
12
13
#include <linux/hashtable.h>
14
15
#include "842.h"
16
#include "842_debugfs.h"
17
18
#define SW842_HASHTABLE8_BITS (10)
19
#define SW842_HASHTABLE4_BITS (11)
20
#define SW842_HASHTABLE2_BITS (10)
21
22
/* By default, we allow compressing input buffers of any length, but we must
23
* use the non-standard "short data" template so the decompressor can correctly
24
* reproduce the uncompressed data buffer at the right length. However the
25
* hardware 842 compressor will not recognize the "short data" template, and
26
* will fail to decompress any compressed buffer containing it (I have no idea
27
* why anyone would want to use software to compress and hardware to decompress
28
* but that's beside the point). This parameter forces the compression
29
* function to simply reject any input buffer that isn't a multiple of 8 bytes
30
* long, instead of using the "short data" template, so that all compressed
31
* buffers produced by this function will be decompressable by the 842 hardware
32
* decompressor. Unless you have a specific need for that, leave this disabled
33
* so that any length buffer can be compressed.
34
*/
35
static bool sw842_strict;
36
module_param_named(strict, sw842_strict, bool, 0644);
37
38
static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
39
{ I8, N0, N0, N0, 0x19 }, /* 8 */
40
{ I4, I4, N0, N0, 0x18 }, /* 18 */
41
{ I4, I2, I2, N0, 0x17 }, /* 25 */
42
{ I2, I2, I4, N0, 0x13 }, /* 25 */
43
{ I2, I2, I2, I2, 0x12 }, /* 32 */
44
{ I4, I2, D2, N0, 0x16 }, /* 33 */
45
{ I4, D2, I2, N0, 0x15 }, /* 33 */
46
{ I2, D2, I4, N0, 0x0e }, /* 33 */
47
{ D2, I2, I4, N0, 0x09 }, /* 33 */
48
{ I2, I2, I2, D2, 0x11 }, /* 40 */
49
{ I2, I2, D2, I2, 0x10 }, /* 40 */
50
{ I2, D2, I2, I2, 0x0d }, /* 40 */
51
{ D2, I2, I2, I2, 0x08 }, /* 40 */
52
{ I4, D4, N0, N0, 0x14 }, /* 41 */
53
{ D4, I4, N0, N0, 0x04 }, /* 41 */
54
{ I2, I2, D4, N0, 0x0f }, /* 48 */
55
{ I2, D2, I2, D2, 0x0c }, /* 48 */
56
{ I2, D4, I2, N0, 0x0b }, /* 48 */
57
{ D2, I2, I2, D2, 0x07 }, /* 48 */
58
{ D2, I2, D2, I2, 0x06 }, /* 48 */
59
{ D4, I2, I2, N0, 0x03 }, /* 48 */
60
{ I2, D2, D4, N0, 0x0a }, /* 56 */
61
{ D2, I2, D4, N0, 0x05 }, /* 56 */
62
{ D4, I2, D2, N0, 0x02 }, /* 56 */
63
{ D4, D2, I2, N0, 0x01 }, /* 56 */
64
{ D8, N0, N0, N0, 0x00 }, /* 64 */
65
};
66
67
struct sw842_hlist_node8 {
68
struct hlist_node node;
69
u64 data;
70
u8 index;
71
};
72
73
struct sw842_hlist_node4 {
74
struct hlist_node node;
75
u32 data;
76
u16 index;
77
};
78
79
struct sw842_hlist_node2 {
80
struct hlist_node node;
81
u16 data;
82
u8 index;
83
};
84
85
#define INDEX_NOT_FOUND (-1)
86
#define INDEX_NOT_CHECKED (-2)
87
88
struct sw842_param {
89
u8 *in;
90
u8 *instart;
91
u64 ilen;
92
u8 *out;
93
u64 olen;
94
u8 bit;
95
u64 data8[1];
96
u32 data4[2];
97
u16 data2[4];
98
int index8[1];
99
int index4[2];
100
int index2[4];
101
DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
102
DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
103
DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
104
struct sw842_hlist_node8 node8[1 << I8_BITS];
105
struct sw842_hlist_node4 node4[1 << I4_BITS];
106
struct sw842_hlist_node2 node2[1 << I2_BITS];
107
};
108
109
#define get_input_data(p, o, b) \
110
be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
111
112
#define init_hashtable_nodes(p, b) do { \
113
int _i; \
114
hash_init((p)->htable##b); \
115
for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) { \
116
(p)->node##b[_i].index = _i; \
117
(p)->node##b[_i].data = 0; \
118
INIT_HLIST_NODE(&(p)->node##b[_i].node); \
119
} \
120
} while (0)
121
122
#define find_index(p, b, n) ({ \
123
struct sw842_hlist_node##b *_n; \
124
p->index##b[n] = INDEX_NOT_FOUND; \
125
hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) { \
126
if (p->data##b[n] == _n->data) { \
127
p->index##b[n] = _n->index; \
128
break; \
129
} \
130
} \
131
p->index##b[n] >= 0; \
132
})
133
134
#define check_index(p, b, n) \
135
((p)->index##b[n] == INDEX_NOT_CHECKED \
136
? find_index(p, b, n) \
137
: (p)->index##b[n] >= 0)
138
139
#define replace_hash(p, b, i, d) do { \
140
struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)]; \
141
hash_del(&_n->node); \
142
_n->data = (p)->data##b[d]; \
143
pr_debug("add hash index%x %x pos %x data %lx\n", b, \
144
(unsigned int)_n->index, \
145
(unsigned int)((p)->in - (p)->instart), \
146
(unsigned long)_n->data); \
147
hash_add((p)->htable##b, &_n->node, _n->data); \
148
} while (0)
149
150
static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
151
152
static int add_bits(struct sw842_param *p, u64 d, u8 n);
153
154
static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
155
{
156
int ret;
157
158
if (n <= s)
159
return -EINVAL;
160
161
ret = add_bits(p, d >> s, n - s);
162
if (ret)
163
return ret;
164
return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
165
}
166
167
static int add_bits(struct sw842_param *p, u64 d, u8 n)
168
{
169
int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
170
u64 o;
171
u8 *out = p->out;
172
173
pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
174
175
if (n > 64)
176
return -EINVAL;
177
178
/* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
179
* or if we're at the end of the output buffer and would write past end
180
*/
181
if (bits > 64)
182
return __split_add_bits(p, d, n, 32);
183
else if (p->olen < 8 && bits > 32 && bits <= 56)
184
return __split_add_bits(p, d, n, 16);
185
else if (p->olen < 4 && bits > 16 && bits <= 24)
186
return __split_add_bits(p, d, n, 8);
187
188
if (DIV_ROUND_UP(bits, 8) > p->olen)
189
return -ENOSPC;
190
191
o = *out & bmask[b];
192
d <<= s;
193
194
if (bits <= 8)
195
*out = o | d;
196
else if (bits <= 16)
197
put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
198
else if (bits <= 24)
199
put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
200
else if (bits <= 32)
201
put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
202
else if (bits <= 40)
203
put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
204
else if (bits <= 48)
205
put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
206
else if (bits <= 56)
207
put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
208
else
209
put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
210
211
p->bit += n;
212
213
if (p->bit > 7) {
214
p->out += p->bit / 8;
215
p->olen -= p->bit / 8;
216
p->bit %= 8;
217
}
218
219
return 0;
220
}
221
222
static int add_template(struct sw842_param *p, u8 c)
223
{
224
int ret, i, b = 0;
225
u8 *t = comp_ops[c];
226
bool inv = false;
227
228
if (c >= OPS_MAX)
229
return -EINVAL;
230
231
pr_debug("template %x\n", t[4]);
232
233
ret = add_bits(p, t[4], OP_BITS);
234
if (ret)
235
return ret;
236
237
for (i = 0; i < 4; i++) {
238
pr_debug("op %x\n", t[i]);
239
240
switch (t[i] & OP_AMOUNT) {
241
case OP_AMOUNT_8:
242
if (b)
243
inv = true;
244
else if (t[i] & OP_ACTION_INDEX)
245
ret = add_bits(p, p->index8[0], I8_BITS);
246
else if (t[i] & OP_ACTION_DATA)
247
ret = add_bits(p, p->data8[0], 64);
248
else
249
inv = true;
250
break;
251
case OP_AMOUNT_4:
252
if (b == 2 && t[i] & OP_ACTION_DATA)
253
ret = add_bits(p, get_input_data(p, 2, 32), 32);
254
else if (b != 0 && b != 4)
255
inv = true;
256
else if (t[i] & OP_ACTION_INDEX)
257
ret = add_bits(p, p->index4[b >> 2], I4_BITS);
258
else if (t[i] & OP_ACTION_DATA)
259
ret = add_bits(p, p->data4[b >> 2], 32);
260
else
261
inv = true;
262
break;
263
case OP_AMOUNT_2:
264
if (b != 0 && b != 2 && b != 4 && b != 6)
265
inv = true;
266
if (t[i] & OP_ACTION_INDEX)
267
ret = add_bits(p, p->index2[b >> 1], I2_BITS);
268
else if (t[i] & OP_ACTION_DATA)
269
ret = add_bits(p, p->data2[b >> 1], 16);
270
else
271
inv = true;
272
break;
273
case OP_AMOUNT_0:
274
inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
275
break;
276
default:
277
inv = true;
278
break;
279
}
280
281
if (ret)
282
return ret;
283
284
if (inv) {
285
pr_err("Invalid templ %x op %d : %x %x %x %x\n",
286
c, i, t[0], t[1], t[2], t[3]);
287
return -EINVAL;
288
}
289
290
b += t[i] & OP_AMOUNT;
291
}
292
293
if (b != 8) {
294
pr_err("Invalid template %x len %x : %x %x %x %x\n",
295
c, b, t[0], t[1], t[2], t[3]);
296
return -EINVAL;
297
}
298
299
if (sw842_template_counts)
300
atomic_inc(&template_count[t[4]]);
301
302
return 0;
303
}
304
305
static int add_repeat_template(struct sw842_param *p, u8 r)
306
{
307
int ret;
308
309
/* repeat param is 0-based */
310
if (!r || --r > REPEAT_BITS_MAX)
311
return -EINVAL;
312
313
ret = add_bits(p, OP_REPEAT, OP_BITS);
314
if (ret)
315
return ret;
316
317
ret = add_bits(p, r, REPEAT_BITS);
318
if (ret)
319
return ret;
320
321
if (sw842_template_counts)
322
atomic_inc(&template_repeat_count);
323
324
return 0;
325
}
326
327
static int add_short_data_template(struct sw842_param *p, u8 b)
328
{
329
int ret, i;
330
331
if (!b || b > SHORT_DATA_BITS_MAX)
332
return -EINVAL;
333
334
ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
335
if (ret)
336
return ret;
337
338
ret = add_bits(p, b, SHORT_DATA_BITS);
339
if (ret)
340
return ret;
341
342
for (i = 0; i < b; i++) {
343
ret = add_bits(p, p->in[i], 8);
344
if (ret)
345
return ret;
346
}
347
348
if (sw842_template_counts)
349
atomic_inc(&template_short_data_count);
350
351
return 0;
352
}
353
354
static int add_zeros_template(struct sw842_param *p)
355
{
356
int ret = add_bits(p, OP_ZEROS, OP_BITS);
357
358
if (ret)
359
return ret;
360
361
if (sw842_template_counts)
362
atomic_inc(&template_zeros_count);
363
364
return 0;
365
}
366
367
static int add_end_template(struct sw842_param *p)
368
{
369
int ret = add_bits(p, OP_END, OP_BITS);
370
371
if (ret)
372
return ret;
373
374
if (sw842_template_counts)
375
atomic_inc(&template_end_count);
376
377
return 0;
378
}
379
380
static bool check_template(struct sw842_param *p, u8 c)
381
{
382
u8 *t = comp_ops[c];
383
int i, match, b = 0;
384
385
if (c >= OPS_MAX)
386
return false;
387
388
for (i = 0; i < 4; i++) {
389
if (t[i] & OP_ACTION_INDEX) {
390
if (t[i] & OP_AMOUNT_2)
391
match = check_index(p, 2, b >> 1);
392
else if (t[i] & OP_AMOUNT_4)
393
match = check_index(p, 4, b >> 2);
394
else if (t[i] & OP_AMOUNT_8)
395
match = check_index(p, 8, 0);
396
else
397
return false;
398
if (!match)
399
return false;
400
}
401
402
b += t[i] & OP_AMOUNT;
403
}
404
405
return true;
406
}
407
408
static void get_next_data(struct sw842_param *p)
409
{
410
p->data8[0] = get_input_data(p, 0, 64);
411
p->data4[0] = get_input_data(p, 0, 32);
412
p->data4[1] = get_input_data(p, 4, 32);
413
p->data2[0] = get_input_data(p, 0, 16);
414
p->data2[1] = get_input_data(p, 2, 16);
415
p->data2[2] = get_input_data(p, 4, 16);
416
p->data2[3] = get_input_data(p, 6, 16);
417
}
418
419
/* update the hashtable entries.
420
* only call this after finding/adding the current template
421
* the dataN fields for the current 8 byte block must be already updated
422
*/
423
static void update_hashtables(struct sw842_param *p)
424
{
425
u64 pos = p->in - p->instart;
426
u64 n8 = (pos >> 3) % (1 << I8_BITS);
427
u64 n4 = (pos >> 2) % (1 << I4_BITS);
428
u64 n2 = (pos >> 1) % (1 << I2_BITS);
429
430
replace_hash(p, 8, n8, 0);
431
replace_hash(p, 4, n4, 0);
432
replace_hash(p, 4, n4, 1);
433
replace_hash(p, 2, n2, 0);
434
replace_hash(p, 2, n2, 1);
435
replace_hash(p, 2, n2, 2);
436
replace_hash(p, 2, n2, 3);
437
}
438
439
/* find the next template to use, and add it
440
* the p->dataN fields must already be set for the current 8 byte block
441
*/
442
static int process_next(struct sw842_param *p)
443
{
444
int ret, i;
445
446
p->index8[0] = INDEX_NOT_CHECKED;
447
p->index4[0] = INDEX_NOT_CHECKED;
448
p->index4[1] = INDEX_NOT_CHECKED;
449
p->index2[0] = INDEX_NOT_CHECKED;
450
p->index2[1] = INDEX_NOT_CHECKED;
451
p->index2[2] = INDEX_NOT_CHECKED;
452
p->index2[3] = INDEX_NOT_CHECKED;
453
454
/* check up to OPS_MAX - 1; last op is our fallback */
455
for (i = 0; i < OPS_MAX - 1; i++) {
456
if (check_template(p, i))
457
break;
458
}
459
460
ret = add_template(p, i);
461
if (ret)
462
return ret;
463
464
return 0;
465
}
466
467
/**
468
* sw842_compress
469
*
470
* Compress the uncompressed buffer of length @ilen at @in to the output buffer
471
* @out, using no more than @olen bytes, using the 842 compression format.
472
*
473
* Returns: 0 on success, error on failure. The @olen parameter
474
* will contain the number of output bytes written on success, or
475
* 0 on error.
476
*/
477
int sw842_compress(const u8 *in, unsigned int ilen,
478
u8 *out, unsigned int *olen, void *wmem)
479
{
480
struct sw842_param *p = (struct sw842_param *)wmem;
481
int ret;
482
u64 last, next, pad, total;
483
u8 repeat_count = 0;
484
u32 crc;
485
486
BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
487
488
init_hashtable_nodes(p, 8);
489
init_hashtable_nodes(p, 4);
490
init_hashtable_nodes(p, 2);
491
492
p->in = (u8 *)in;
493
p->instart = p->in;
494
p->ilen = ilen;
495
p->out = out;
496
p->olen = *olen;
497
p->bit = 0;
498
499
total = p->olen;
500
501
*olen = 0;
502
503
/* if using strict mode, we can only compress a multiple of 8 */
504
if (sw842_strict && (ilen % 8)) {
505
pr_err("Using strict mode, can't compress len %d\n", ilen);
506
return -EINVAL;
507
}
508
509
/* let's compress at least 8 bytes, mkay? */
510
if (unlikely(ilen < 8))
511
goto skip_comp;
512
513
/* make initial 'last' different so we don't match the first time */
514
last = ~get_unaligned((u64 *)p->in);
515
516
while (p->ilen > 7) {
517
next = get_unaligned((u64 *)p->in);
518
519
/* must get the next data, as we need to update the hashtable
520
* entries with the new data every time
521
*/
522
get_next_data(p);
523
524
/* we don't care about endianness in last or next;
525
* we're just comparing 8 bytes to another 8 bytes,
526
* they're both the same endianness
527
*/
528
if (next == last) {
529
/* repeat count bits are 0-based, so we stop at +1 */
530
if (++repeat_count <= REPEAT_BITS_MAX)
531
goto repeat;
532
}
533
if (repeat_count) {
534
ret = add_repeat_template(p, repeat_count);
535
if (ret)
536
return ret;
537
repeat_count = 0;
538
if (next == last) /* reached max repeat bits */
539
goto repeat;
540
}
541
542
if (next == 0)
543
ret = add_zeros_template(p);
544
else
545
ret = process_next(p);
546
547
if (ret)
548
return ret;
549
550
repeat:
551
last = next;
552
update_hashtables(p);
553
p->in += 8;
554
p->ilen -= 8;
555
}
556
557
if (repeat_count) {
558
ret = add_repeat_template(p, repeat_count);
559
if (ret)
560
return ret;
561
}
562
563
skip_comp:
564
if (p->ilen > 0) {
565
ret = add_short_data_template(p, p->ilen);
566
if (ret)
567
return ret;
568
569
p->in += p->ilen;
570
p->ilen = 0;
571
}
572
573
ret = add_end_template(p);
574
if (ret)
575
return ret;
576
577
/*
578
* crc(0:31) is appended to target data starting with the next
579
* bit after End of stream template.
580
* nx842 calculates CRC for data in big-endian format. So doing
581
* same here so that sw842 decompression can be used for both
582
* compressed data.
583
*/
584
crc = crc32_be(0, in, ilen);
585
ret = add_bits(p, crc, CRC_BITS);
586
if (ret)
587
return ret;
588
589
if (p->bit) {
590
p->out++;
591
p->olen--;
592
p->bit = 0;
593
}
594
595
/* pad compressed length to multiple of 8 */
596
pad = (8 - ((total - p->olen) % 8)) % 8;
597
if (pad) {
598
if (pad > p->olen) /* we were so close! */
599
return -ENOSPC;
600
memset(p->out, 0, pad);
601
p->out += pad;
602
p->olen -= pad;
603
}
604
605
if (unlikely((total - p->olen) > UINT_MAX))
606
return -ENOSPC;
607
608
*olen = total - p->olen;
609
610
return 0;
611
}
612
EXPORT_SYMBOL_GPL(sw842_compress);
613
614
static int __init sw842_init(void)
615
{
616
if (sw842_template_counts)
617
sw842_debugfs_create();
618
619
return 0;
620
}
621
module_init(sw842_init);
622
623
static void __exit sw842_exit(void)
624
{
625
if (sw842_template_counts)
626
sw842_debugfs_remove();
627
}
628
module_exit(sw842_exit);
629
630
MODULE_LICENSE("GPL");
631
MODULE_DESCRIPTION("Software 842 Compressor");
632
MODULE_AUTHOR("Dan Streetman <[email protected]>");
633
634