Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/mod.rs
7887 views
1
mod decoder;
2
mod encoder;
3
4
pub(crate) use decoder::{Decoder, SumGatherer};
5
pub(crate) use encoder::encode;
6
7
/// The sum of `start, start + delta, start + 2 * delta, ... len times`.
8
pub(crate) fn lin_natural_sum(start: i64, delta: i64, len: usize) -> i64 {
9
debug_assert!(len < i64::MAX as usize);
10
11
let base = start * len as i64;
12
let sum = if len == 0 {
13
0
14
} else {
15
let is_odd = len & 1;
16
// SUM_i=0^n f * i = f * (n(n+1)/2)
17
let sum = (len >> (is_odd ^ 1)) * (len.wrapping_sub(1) >> is_odd);
18
delta * sum as i64
19
};
20
21
base + sum
22
}
23
24
#[cfg(test)]
25
mod tests {
26
use super::*;
27
use crate::parquet::error::{ParquetError, ParquetResult};
28
29
#[test]
30
fn linear_natural_sum() {
31
assert_eq!(lin_natural_sum(0, 0, 0), 0);
32
assert_eq!(lin_natural_sum(10, 4, 0), 0);
33
assert_eq!(lin_natural_sum(0, 1, 1), 0);
34
assert_eq!(lin_natural_sum(0, 1, 3), 3);
35
assert_eq!(lin_natural_sum(0, 1, 4), 6);
36
assert_eq!(lin_natural_sum(0, 2, 3), 6);
37
assert_eq!(lin_natural_sum(2, 2, 3), 12);
38
}
39
40
#[test]
41
fn basic() -> Result<(), ParquetError> {
42
let data = vec![1, 3, 1, 2, 3];
43
44
let mut buffer = vec![];
45
encode(data.clone().into_iter(), &mut buffer, 1);
46
let (iter, _) = Decoder::try_new(&buffer)?;
47
48
let result = iter.collect::<Vec<_>>()?;
49
assert_eq!(result, data);
50
Ok(())
51
}
52
53
#[test]
54
fn negative_value() -> Result<(), ParquetError> {
55
let data = vec![1, 3, -1, 2, 3];
56
57
let mut buffer = vec![];
58
encode(data.clone().into_iter(), &mut buffer, 1);
59
let (iter, _) = Decoder::try_new(&buffer)?;
60
61
let result = iter.collect::<Vec<_>>()?;
62
assert_eq!(result, data);
63
Ok(())
64
}
65
66
#[test]
67
fn some() -> Result<(), ParquetError> {
68
let data = vec![
69
-2147483648,
70
-1777158217,
71
-984917788,
72
-1533539476,
73
-731221386,
74
-1322398478,
75
906736096,
76
];
77
78
let mut buffer = vec![];
79
encode(data.clone().into_iter(), &mut buffer, 1);
80
let (iter, _) = Decoder::try_new(&buffer)?;
81
82
let result = iter.collect::<Vec<_>>()?;
83
assert_eq!(result, data);
84
Ok(())
85
}
86
87
#[test]
88
fn more_than_one_block() -> Result<(), ParquetError> {
89
let mut data = vec![1, 3, -1, 2, 3, 10, 1];
90
for x in 0..128 {
91
data.push(x - 10)
92
}
93
94
let mut buffer = vec![];
95
encode(data.clone().into_iter(), &mut buffer, 1);
96
let (iter, _) = Decoder::try_new(&buffer)?;
97
98
let result = iter.collect::<Vec<_>>()?;
99
assert_eq!(result, data);
100
Ok(())
101
}
102
103
#[test]
104
fn test_another() -> Result<(), ParquetError> {
105
let data = vec![2, 3, 1, 2, 1];
106
107
let mut buffer = vec![];
108
encode(data.clone().into_iter(), &mut buffer, 1);
109
let (iter, _) = Decoder::try_new(&buffer)?;
110
111
let result = iter.collect::<Vec<_>>()?;
112
assert_eq!(result, data);
113
114
Ok(())
115
}
116
117
#[test]
118
fn overflow_constant() -> ParquetResult<()> {
119
let data = vec![i64::MIN, i64::MAX, i64::MIN, i64::MAX];
120
121
let mut buffer = vec![];
122
encode(data.clone().into_iter(), &mut buffer, 1);
123
let (iter, _) = Decoder::try_new(&buffer)?;
124
125
let result = iter.collect::<Vec<_>>()?;
126
assert_eq!(result, data);
127
128
Ok(())
129
}
130
131
#[test]
132
fn overflow_vary() -> ParquetResult<()> {
133
let data = vec![
134
0,
135
i64::MAX,
136
i64::MAX - 1,
137
i64::MIN + 1,
138
i64::MAX,
139
i64::MIN + 2,
140
];
141
142
let mut buffer = vec![];
143
encode(data.clone().into_iter(), &mut buffer, 1);
144
let (iter, _) = Decoder::try_new(&buffer)?;
145
146
let result = iter.collect::<Vec<_>>()?;
147
assert_eq!(result, data);
148
149
Ok(())
150
}
151
}
152
153