Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-parquet/src/parquet/statistics/primitive.rs
6940 views
1
use polars_parquet_format::Statistics as ParquetStatistics;
2
3
use crate::parquet::error::{ParquetError, ParquetResult};
4
use crate::parquet::schema::types::PrimitiveType;
5
use crate::parquet::types;
6
7
#[derive(Debug, Clone, PartialEq)]
8
pub struct PrimitiveStatistics<T: types::NativeType> {
9
pub primitive_type: PrimitiveType,
10
pub null_count: Option<i64>,
11
pub distinct_count: Option<i64>,
12
pub min_value: Option<T>,
13
pub max_value: Option<T>,
14
}
15
16
impl<T: types::NativeType> PrimitiveStatistics<T> {
17
pub fn deserialize(
18
v: &ParquetStatistics,
19
primitive_type: PrimitiveType,
20
) -> ParquetResult<Self> {
21
if v.max_value
22
.as_ref()
23
.is_some_and(|v| v.len() != size_of::<T>())
24
{
25
return Err(ParquetError::oos(
26
"The max_value of statistics MUST be plain encoded",
27
));
28
};
29
if v.min_value
30
.as_ref()
31
.is_some_and(|v| v.len() != size_of::<T>())
32
{
33
return Err(ParquetError::oos(
34
"The min_value of statistics MUST be plain encoded",
35
));
36
};
37
38
Ok(Self {
39
primitive_type,
40
null_count: v.null_count,
41
distinct_count: v.distinct_count,
42
max_value: v.max_value.as_ref().map(|x| types::decode(x)),
43
min_value: v.min_value.as_ref().map(|x| types::decode(x)),
44
})
45
}
46
47
pub fn serialize(&self) -> ParquetStatistics {
48
ParquetStatistics {
49
null_count: self.null_count,
50
distinct_count: self.distinct_count,
51
max_value: self.max_value.map(|x| x.to_le_bytes().as_ref().to_vec()),
52
min_value: self.min_value.map(|x| x.to_le_bytes().as_ref().to_vec()),
53
max: None,
54
min: None,
55
is_max_value_exact: None,
56
is_min_value_exact: None,
57
}
58
}
59
}
60
61