CoCalc -- drbd

GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/block/drbd/drbd_vli.h
¹⁷⁴⁹⁵ views
1
/*
2
-*- linux-c -*-
3
   drbd_receiver.c
4
   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5

6
   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7
   Copyright (C) 1999-2008, Philipp Reisner <[email protected]>.
8
   Copyright (C) 2002-2008, Lars Ellenberg <[email protected]>.
9

10
   drbd is free software; you can redistribute it and/or modify
11
   it under the terms of the GNU General Public License as published by
12
   the Free Software Foundation; either version 2, or (at your option)
13
   any later version.
14

15
   drbd is distributed in the hope that it will be useful,
16
   but WITHOUT ANY WARRANTY; without even the implied warranty of
17
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
   GNU General Public License for more details.
19

20
   You should have received a copy of the GNU General Public License
21
   along with drbd; see the file COPYING.  If not, write to
22
   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
 */
24

25
#ifndef _DRBD_VLI_H
26
#define _DRBD_VLI_H
27

28
/*
29
 * At a granularity of 4KiB storage represented per bit,
30
 * and stroage sizes of several TiB,
31
 * and possibly small-bandwidth replication,
32
 * the bitmap transfer time can take much too long,
33
 * if transmitted in plain text.
34
 *
35
 * We try to reduce the transferred bitmap information
36
 * by encoding runlengths of bit polarity.
37
 *
38
 * We never actually need to encode a "zero" (runlengths are positive).
39
 * But then we have to store the value of the first bit.
40
 * The first bit of information thus shall encode if the first runlength
41
 * gives the number of set or unset bits.
42
 *
43
 * We assume that large areas are either completely set or unset,
44
 * which gives good compression with any runlength method,
45
 * even when encoding the runlength as fixed size 32bit/64bit integers.
46
 *
47
 * Still, there may be areas where the polarity flips every few bits,
48
 * and encoding the runlength sequence of those areas with fix size
49
 * integers would be much worse than plaintext.
50
 *
51
 * We want to encode small runlength values with minimum code length,
52
 * while still being able to encode a Huge run of all zeros.
53
 *
54
 * Thus we need a Variable Length Integer encoding, VLI.
55
 *
56
 * For some cases, we produce more code bits than plaintext input.
57
 * We need to send incompressible chunks as plaintext, skip over them
58
 * and then see if the next chunk compresses better.
59
 *
60
 * We don't care too much about "excellent" compression ratio for large
61
 * runlengths (all set/all clear): whether we achieve a factor of 100
62
 * or 1000 is not that much of an issue.
63
 * We do not want to waste too much on short runlengths in the "noisy"
64
 * parts of the bitmap, though.
65
 *
66
 * There are endless variants of VLI, we experimented with:
67
 *  * simple byte-based
68
 *  * various bit based with different code word length.
69
 *
70
 * To avoid yet an other configuration parameter (choice of bitmap compression
71
 * algorithm) which was difficult to explain and tune, we just chose the one
72
 * variant that turned out best in all test cases.
73
 * Based on real world usage patterns, with device sizes ranging from a few GiB
74
 * to several TiB, file server/mailserver/webserver/mysql/postgress,
75
 * mostly idle to really busy, the all time winner (though sometimes only
76
 * marginally better) is:
77
 */
78

79
/*
80
 * encoding is "visualised" as
81
 * __little endian__ bitstream, least significant bit first (left most)
82
 *
83
 * this particular encoding is chosen so that the prefix code
84
 * starts as unary encoding the level, then modified so that
85
 * 10 levels can be described in 8bit, with minimal overhead
86
 * for the smaller levels.
87
 *
88
 * Number of data bits follow fibonacci sequence, with the exception of the
89
 * last level (+1 data bit, so it makes 64bit total).  The only worse code when
90
 * encoding bit polarity runlength is 1 plain bits => 2 code bits.
91
prefix    data bits                                    max val  Nº data bits
92
0 x                                                         0x2            1
93
10 x                                                        0x4            1
94
110 xx                                                      0x8            2
95
1110 xxx                                                   0x10            3
96
11110 xxx xx                                               0x30            5
97
111110 xx xxxxxx                                          0x130            8
98
11111100  xxxxxxxx xxxxx                                 0x2130           13
99
11111110  xxxxxxxx xxxxxxxx xxxxx                      0x202130           21
100
11111101  xxxxxxxx xxxxxxxx xxxxxxxx  xxxxxxxx xx   0x400202130           34
101
11111111  xxxxxxxx xxxxxxxx xxxxxxxx  xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56
102
 * maximum encodable value: 0x100000400202130 == 2**56 + some */
103

104
/* compression "table":
105
 transmitted   x                                0.29
106
 as plaintext x                                  ........................
107
             x                                   ........................
108
            x                                    ........................
109
           x    0.59                         0.21........................
110
          x      ........................................................
111
         x       .. c ...................................................
112
        x    0.44.. o ...................................................
113
       x .......... d ...................................................
114
      x  .......... e ...................................................
115
     X.............   ...................................................
116
    x.............. b ...................................................
117
2.0x............... i ...................................................
118
 #X................ t ...................................................
119
 #................. s ...........................  plain bits  ..........
120
-+-----------------------------------------------------------------------
121
 1             16              32                              64
122
*/
123

124
/* LEVEL: (total bits, prefix bits, prefix value),
125
 * sorted ascending by number of total bits.
126
 * The rest of the code table is calculated at compiletime from this. */
127

128
/* fibonacci data 1, 1, ... */
129
#define VLI_L_1_1() do { \
130
	LEVEL( 2, 1, 0x00); \
131
	LEVEL( 3, 2, 0x01); \
132
	LEVEL( 5, 3, 0x03); \
133
	LEVEL( 7, 4, 0x07); \
134
	LEVEL(10, 5, 0x0f); \
135
	LEVEL(14, 6, 0x1f); \
136
	LEVEL(21, 8, 0x3f); \
137
	LEVEL(29, 8, 0x7f); \
138
	LEVEL(42, 8, 0xbf); \
139
	LEVEL(64, 8, 0xff); \
140
	} while (0)
141

142
/* finds a suitable level to decode the least significant part of in.
143
 * returns number of bits consumed.
144
 *
145
 * BUG() for bad input, as that would mean a buggy code table. */
146
static inline int vli_decode_bits(u64 *out, const u64 in)
147
{
148
	u64 adj = 1;
149

150
#define LEVEL(t,b,v)					\
151
	do {						\
152
		if ((in & ((1 << b) -1)) == v) {	\
153
			*out = ((in & ((~0ULL) >> (64-t))) >> b) + adj;	\
154
			return t;			\
155
		}					\
156
		adj += 1ULL << (t - b);			\
157
	} while (0)
158

159
	VLI_L_1_1();
160

161
	/* NOT REACHED, if VLI_LEVELS code table is defined properly */
162
	BUG();
163
#undef LEVEL
164
}
165

166
/* return number of code bits needed,
167
 * or negative error number */
168
static inline int __vli_encode_bits(u64 *out, const u64 in)
169
{
170
	u64 max = 0;
171
	u64 adj = 1;
172

173
	if (in == 0)
174
		return -EINVAL;
175

176
#define LEVEL(t,b,v) do {		\
177
		max += 1ULL << (t - b);	\
178
		if (in <= max) {	\
179
			if (out)	\
180
				*out = ((in - adj) << b) | v;	\
181
			return t;	\
182
		}			\
183
		adj = max + 1;		\
184
	} while (0)
185

186
	VLI_L_1_1();
187

188
	return -EOVERFLOW;
189
#undef LEVEL
190
}
191

192
#undef VLI_L_1_1
193

194
/* code from here down is independend of actually used bit code */
195

196
/*
197
 * Code length is determined by some unique (e.g. unary) prefix.
198
 * This encodes arbitrary bit length, not whole bytes: we have a bit-stream,
199
 * not a byte stream.
200
 */
201

202
/* for the bitstream, we need a cursor */
203
struct bitstream_cursor {
204
	/* the current byte */
205
	u8 *b;
206
	/* the current bit within *b, nomalized: 0..7 */
207
	unsigned int bit;
208
};
209

210
/* initialize cursor to point to first bit of stream */
211
static inline void bitstream_cursor_reset(struct bitstream_cursor *cur, void *s)
212
{
213
	cur->b = s;
214
	cur->bit = 0;
215
}
216

217
/* advance cursor by that many bits; maximum expected input value: 64,
218
 * but depending on VLI implementation, it may be more. */
219
static inline void bitstream_cursor_advance(struct bitstream_cursor *cur, unsigned int bits)
220
{
221
	bits += cur->bit;
222
	cur->b = cur->b + (bits >> 3);
223
	cur->bit = bits & 7;
224
}
225

226
/* the bitstream itself knows its length */
227
struct bitstream {
228
	struct bitstream_cursor cur;
229
	unsigned char *buf;
230
	size_t buf_len;		/* in bytes */
231

232
	/* for input stream:
233
	 * number of trailing 0 bits for padding
234
	 * total number of valid bits in stream: buf_len * 8 - pad_bits */
235
	unsigned int pad_bits;
236
};
237

238
static inline void bitstream_init(struct bitstream *bs, void *s, size_t len, unsigned int pad_bits)
239
{
240
	bs->buf = s;
241
	bs->buf_len = len;
242
	bs->pad_bits = pad_bits;
243
	bitstream_cursor_reset(&bs->cur, bs->buf);
244
}
245

246
static inline void bitstream_rewind(struct bitstream *bs)
247
{
248
	bitstream_cursor_reset(&bs->cur, bs->buf);
249
	memset(bs->buf, 0, bs->buf_len);
250
}
251

252
/* Put (at most 64) least significant bits of val into bitstream, and advance cursor.
253
 * Ignores "pad_bits".
254
 * Returns zero if bits == 0 (nothing to do).
255
 * Returns number of bits used if successful.
256
 *
257
 * If there is not enough room left in bitstream,
258
 * leaves bitstream unchanged and returns -ENOBUFS.
259
 */
260
static inline int bitstream_put_bits(struct bitstream *bs, u64 val, const unsigned int bits)
261
{
262
	unsigned char *b = bs->cur.b;
263
	unsigned int tmp;
264

265
	if (bits == 0)
266
		return 0;
267

268
	if ((bs->cur.b + ((bs->cur.bit + bits -1) >> 3)) - bs->buf >= bs->buf_len)
269
		return -ENOBUFS;
270

271
	/* paranoia: strip off hi bits; they should not be set anyways. */
272
	if (bits < 64)
273
		val &= ~0ULL >> (64 - bits);
274

275
	*b++ |= (val & 0xff) << bs->cur.bit;
276

277
	for (tmp = 8 - bs->cur.bit; tmp < bits; tmp += 8)
278
		*b++ |= (val >> tmp) & 0xff;
279

280
	bitstream_cursor_advance(&bs->cur, bits);
281
	return bits;
282
}
283

284
/* Fetch (at most 64) bits from bitstream into *out, and advance cursor.
285
 *
286
 * If more than 64 bits are requested, returns -EINVAL and leave *out unchanged.
287
 *
288
 * If there are less than the requested number of valid bits left in the
289
 * bitstream, still fetches all available bits.
290
 *
291
 * Returns number of actually fetched bits.
292
 */
293
static inline int bitstream_get_bits(struct bitstream *bs, u64 *out, int bits)
294
{
295
	u64 val;
296
	unsigned int n;
297

298
	if (bits > 64)
299
		return -EINVAL;
300

301
	if (bs->cur.b + ((bs->cur.bit + bs->pad_bits + bits -1) >> 3) - bs->buf >= bs->buf_len)
302
		bits = ((bs->buf_len - (bs->cur.b - bs->buf)) << 3)
303
			- bs->cur.bit - bs->pad_bits;
304

305
	if (bits == 0) {
306
		*out = 0;
307
		return 0;
308
	}
309

310
	/* get the high bits */
311
	val = 0;
312
	n = (bs->cur.bit + bits + 7) >> 3;
313
	/* n may be at most 9, if cur.bit + bits > 64 */
314
	/* which means this copies at most 8 byte */
315
	if (n) {
316
		memcpy(&val, bs->cur.b+1, n - 1);
317
		val = le64_to_cpu(val) << (8 - bs->cur.bit);
318
	}
319

320
	/* we still need the low bits */
321
	val |= bs->cur.b[0] >> bs->cur.bit;
322

323
	/* and mask out bits we don't want */
324
	val &= ~0ULL >> (64 - bits);
325

326
	bitstream_cursor_advance(&bs->cur, bits);
327
	*out = val;
328

329
	return bits;
330
}
331

332
/* encodes @in as vli into @bs;
333

334
 * return values
335
 *  > 0: number of bits successfully stored in bitstream
336
 * -ENOBUFS @bs is full
337
 * -EINVAL input zero (invalid)
338
 * -EOVERFLOW input too large for this vli code (invalid)
339
 */
340
static inline int vli_encode_bits(struct bitstream *bs, u64 in)
341
{
342
	u64 code = code;
343
	int bits = __vli_encode_bits(&code, in);
344

345
	if (bits <= 0)
346
		return bits;
347

348
	return bitstream_put_bits(bs, code, bits);
349
}
350

351
#endif
352

353
Product

Resources

Company