Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-parquet/src/arrow/read/deserialize/null.rs
6940 views
1
//! This implements the [`Decoder`][utils::Decoder] trait for the `UNKNOWN` or `Null` nested type.
2
//! The implementation mostly stubs all the function and just keeps track of the length in the
3
//! `DecodedState`.
4
5
use arrow::array::NullArray;
6
use arrow::bitmap::{Bitmap, BitmapBuilder};
7
use arrow::datatypes::ArrowDataType;
8
9
use super::PredicateFilter;
10
use super::utils::filter::Filter;
11
use super::utils::{self};
12
use crate::parquet::error::ParquetResult;
13
use crate::parquet::page::{DataPage, DictPage};
14
15
pub(crate) struct NullDecoder;
16
pub(crate) struct NullTranslation {
17
num_rows: usize,
18
}
19
20
#[derive(Debug)]
21
pub(crate) struct NullArrayLength {
22
length: usize,
23
}
24
25
impl utils::Decoded for NullArrayLength {
26
fn len(&self) -> usize {
27
self.length
28
}
29
fn extend_nulls(&mut self, n: usize) {
30
self.length += n;
31
}
32
}
33
34
impl<'a> utils::StateTranslation<'a, NullDecoder> for NullTranslation {
35
type PlainDecoder = ();
36
37
fn new(
38
_decoder: &NullDecoder,
39
page: &'a DataPage,
40
_dict: Option<&'a <NullDecoder as utils::Decoder>::Dict>,
41
_page_validity: Option<&Bitmap>,
42
) -> ParquetResult<Self> {
43
Ok(NullTranslation {
44
num_rows: page.num_values(),
45
})
46
}
47
fn num_rows(&self) -> usize {
48
self.num_rows
49
}
50
}
51
52
impl utils::Decoder for NullDecoder {
53
type Translation<'a> = NullTranslation;
54
type Dict = NullArray;
55
type DecodedState = NullArrayLength;
56
type Output = NullArray;
57
58
/// Initializes a new state
59
fn with_capacity(&self, _: usize) -> Self::DecodedState {
60
NullArrayLength { length: 0 }
61
}
62
63
fn deserialize_dict(&mut self, _: DictPage) -> ParquetResult<Self::Dict> {
64
Ok(NullArray::new_empty(ArrowDataType::Null))
65
}
66
67
fn has_predicate_specialization(
68
&self,
69
_state: &utils::State<'_, Self>,
70
_predicate: &PredicateFilter,
71
) -> ParquetResult<bool> {
72
// @TODO: This can be enabled for the fast paths
73
Ok(false)
74
}
75
76
fn extend_decoded(
77
&self,
78
decoded: &mut Self::DecodedState,
79
additional: &dyn arrow::array::Array,
80
_is_optional: bool,
81
) -> ParquetResult<()> {
82
let additional = additional.as_any().downcast_ref::<NullArray>().unwrap();
83
decoded.length += additional.len();
84
85
Ok(())
86
}
87
88
fn finalize(
89
&self,
90
dtype: ArrowDataType,
91
_dict: Option<Self::Dict>,
92
decoded: Self::DecodedState,
93
) -> ParquetResult<Self::Output> {
94
Ok(NullArray::new(dtype, decoded.length))
95
}
96
97
fn extend_filtered_with_state(
98
&mut self,
99
state: utils::State<'_, Self>,
100
decoded: &mut Self::DecodedState,
101
_pred_true_mask: &mut BitmapBuilder,
102
filter: Option<Filter>,
103
) -> ParquetResult<()> {
104
if matches!(filter, Some(Filter::Predicate(_))) {
105
todo!()
106
}
107
108
let num_rows = match filter {
109
Some(f) => f.num_rows(0),
110
None => state.translation.num_rows,
111
};
112
decoded.length += num_rows;
113
114
Ok(())
115
}
116
}
117
118