Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-parquet/src/parquet/encoding/bitpacked/mod.rs
7887 views
1
macro_rules! seq_macro {
2
($i:ident in 1..15 $block:block) => {
3
seq_macro!($i in [
4
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
5
] $block)
6
};
7
($i:ident in 0..16 $block:block) => {
8
seq_macro!($i in [
9
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
10
] $block)
11
};
12
($i:ident in 0..=16 $block:block) => {
13
seq_macro!($i in [
14
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15
16,
16
] $block)
17
};
18
($i:ident in 1..31 $block:block) => {
19
seq_macro!($i in [
20
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
21
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
22
] $block)
23
};
24
($i:ident in 0..32 $block:block) => {
25
seq_macro!($i in [
26
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
27
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
28
] $block)
29
};
30
($i:ident in 0..=32 $block:block) => {
31
seq_macro!($i in [
32
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
33
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
34
32,
35
] $block)
36
};
37
($i:ident in 1..63 $block:block) => {
38
seq_macro!($i in [
39
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
40
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
41
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
42
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
43
] $block)
44
};
45
($i:ident in 0..64 $block:block) => {
46
seq_macro!($i in [
47
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
48
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
49
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
50
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
51
] $block)
52
};
53
($i:ident in 0..=64 $block:block) => {
54
seq_macro!($i in [
55
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
56
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
57
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
58
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
59
64,
60
] $block)
61
};
62
($i:ident in [$($value:literal),+ $(,)?] $block:block) => {
63
$({
64
#[allow(non_upper_case_globals)]
65
const $i: usize = $value;
66
{ $block }
67
})+
68
};
69
}
70
71
mod decode;
72
mod encode;
73
mod pack;
74
mod unpack;
75
76
pub use decode::{ChunkedDecoder, Decoder};
77
pub use encode::{encode, encode_pack};
78
79
/// A byte slice (e.g. `[u8; 8]`) denoting types that represent complete packs.
80
pub trait Packed:
81
Copy
82
+ Sized
83
+ AsRef<[u8]>
84
+ AsMut<[u8]>
85
+ std::ops::IndexMut<usize, Output = u8>
86
+ for<'a> TryFrom<&'a [u8]>
87
{
88
const LENGTH: usize;
89
fn zero() -> Self;
90
}
91
92
impl Packed for [u8; 8] {
93
const LENGTH: usize = 8;
94
#[inline]
95
fn zero() -> Self {
96
[0; 8]
97
}
98
}
99
100
impl Packed for [u8; 16 * 2] {
101
const LENGTH: usize = 16 * 2;
102
#[inline]
103
fn zero() -> Self {
104
[0; 16 * 2]
105
}
106
}
107
108
impl Packed for [u8; 32 * 4] {
109
const LENGTH: usize = 32 * 4;
110
#[inline]
111
fn zero() -> Self {
112
[0; 32 * 4]
113
}
114
}
115
116
impl Packed for [u8; 64 * 8] {
117
const LENGTH: usize = 64 * 8;
118
#[inline]
119
fn zero() -> Self {
120
[0; 64 * 8]
121
}
122
}
123
124
/// A byte slice of [`Unpackable`] denoting complete unpacked arrays.
125
pub trait Unpacked<T>:
126
Copy
127
+ Sized
128
+ AsRef<[T]>
129
+ AsMut<[T]>
130
+ std::ops::Index<usize, Output = T>
131
+ std::ops::IndexMut<usize, Output = T>
132
+ for<'a> TryFrom<&'a [T], Error = std::array::TryFromSliceError>
133
{
134
const LENGTH: usize;
135
fn zero() -> Self;
136
}
137
138
impl Unpacked<u8> for [u8; 8] {
139
const LENGTH: usize = 8;
140
#[inline]
141
fn zero() -> Self {
142
[0; 8]
143
}
144
}
145
146
impl Unpacked<u16> for [u16; 16] {
147
const LENGTH: usize = 16;
148
#[inline]
149
fn zero() -> Self {
150
[0; 16]
151
}
152
}
153
154
impl Unpacked<u32> for [u32; 32] {
155
const LENGTH: usize = 32;
156
#[inline]
157
fn zero() -> Self {
158
[0; 32]
159
}
160
}
161
162
impl Unpacked<u64> for [u64; 64] {
163
const LENGTH: usize = 64;
164
#[inline]
165
fn zero() -> Self {
166
[0; 64]
167
}
168
}
169
170
/// A type representing a type that can be bitpacked and unpacked by this crate.
171
pub trait Unpackable: Copy + Sized + Default {
172
type Packed: Packed;
173
type Unpacked: Unpacked<Self>;
174
175
fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked);
176
fn pack(unpacked: &Self::Unpacked, num_bits: usize, packed: &mut [u8]);
177
}
178
179
impl Unpackable for u16 {
180
type Packed = [u8; 16 * 2];
181
type Unpacked = [u16; 16];
182
183
#[inline]
184
fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked) {
185
unpack::unpack16(packed, unpacked, num_bits)
186
}
187
188
#[inline]
189
fn pack(packed: &Self::Unpacked, num_bits: usize, unpacked: &mut [u8]) {
190
pack::pack16(packed, unpacked, num_bits)
191
}
192
}
193
194
impl Unpackable for u32 {
195
type Packed = [u8; 32 * 4];
196
type Unpacked = [u32; 32];
197
198
#[inline]
199
fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked) {
200
unpack::unpack32(packed, unpacked, num_bits)
201
}
202
203
#[inline]
204
fn pack(packed: &Self::Unpacked, num_bits: usize, unpacked: &mut [u8]) {
205
pack::pack32(packed, unpacked, num_bits)
206
}
207
}
208
209
impl Unpackable for u64 {
210
type Packed = [u8; 64 * 8];
211
type Unpacked = [u64; 64];
212
213
#[inline]
214
fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked) {
215
unpack::unpack64(packed, unpacked, num_bits)
216
}
217
218
#[inline]
219
fn pack(packed: &Self::Unpacked, num_bits: usize, unpacked: &mut [u8]) {
220
pack::pack64(packed, unpacked, num_bits)
221
}
222
}
223
224
#[cfg(test)]
225
mod tests {
226
use super::*;
227
228
pub fn case1() -> (usize, Vec<u32>, Vec<u8>) {
229
let num_bits = 3;
230
let compressed = vec![
231
0b10001000u8,
232
0b11000110,
233
0b11111010,
234
0b10001000u8,
235
0b11000110,
236
0b11111010,
237
0b10001000u8,
238
0b11000110,
239
0b11111010,
240
0b10001000u8,
241
0b11000110,
242
0b11111010,
243
0b10001000u8,
244
0b11000110,
245
0b11111010,
246
];
247
let decompressed = vec![
248
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4,
249
5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
250
];
251
(num_bits, decompressed, compressed)
252
}
253
254
#[test]
255
fn encode_large() {
256
let (num_bits, unpacked, expected) = case1();
257
let mut packed = vec![0u8; 4 * 32];
258
259
encode(&unpacked, num_bits, &mut packed);
260
assert_eq!(&packed[..15], expected);
261
}
262
263
#[test]
264
fn test_encode() {
265
let num_bits = 3;
266
let unpacked = vec![0, 1, 2, 3, 4, 5, 6, 7];
267
268
let mut packed = vec![0u8; 4 * 32];
269
270
encode::<u32>(&unpacked, num_bits, &mut packed);
271
272
let expected = vec![0b10001000u8, 0b11000110, 0b11111010];
273
274
assert_eq!(&packed[..3], expected);
275
}
276
}
277
278