Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-parquet/src/parquet/encoding/delta_byte_array/encoder.rs
7887 views
1
use super::super::delta_bitpacked;
2
use crate::parquet::encoding::delta_length_byte_array;
3
4
/// Encodes an iterator of according to DELTA_BYTE_ARRAY
5
pub fn encode<'a, I: ExactSizeIterator<Item = &'a [u8]> + Clone>(
6
iterator: I,
7
buffer: &mut Vec<u8>,
8
) {
9
let mut previous = b"".as_ref();
10
11
let mut sum_lengths = 0;
12
let prefixes = iterator
13
.clone()
14
.map(|item| {
15
let prefix_length = item
16
.iter()
17
.zip(previous.iter())
18
.enumerate()
19
// find first difference
20
.find_map(|(length, (lhs, rhs))| (lhs != rhs).then_some(length))
21
.unwrap_or(previous.len());
22
previous = item;
23
24
sum_lengths += item.len() - prefix_length;
25
prefix_length as i64
26
})
27
.collect::<Vec<_>>();
28
delta_bitpacked::encode(prefixes.iter().copied(), buffer, 1);
29
30
let remaining = iterator
31
.zip(prefixes)
32
.map(|(item, prefix)| &item[prefix as usize..]);
33
34
delta_length_byte_array::encode(remaining, buffer);
35
}
36
37