Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-parquet/src/arrow/read/expr.rs
8430 views
1
use std::sync::Arc;
2
3
use arrow::array::Array;
4
use arrow::bitmap::{Bitmap, BitmapBuilder};
5
use arrow::types::AlignedBytes;
6
7
#[derive(Clone)]
8
pub enum ParquetScalar {
9
Null,
10
11
Boolean(bool),
12
13
Int8(i8),
14
Int16(i16),
15
Int32(i32),
16
Int64(i64),
17
UInt8(u8),
18
UInt16(u16),
19
UInt32(u32),
20
UInt64(u64),
21
22
Float32(f32),
23
Float64(f64),
24
25
FixedSizeBinary(Box<[u8]>),
26
27
String(Box<str>),
28
Binary(Box<[u8]>),
29
}
30
31
impl ParquetScalar {
32
pub(crate) fn is_null(&self) -> bool {
33
matches!(self, Self::Null)
34
}
35
36
pub(crate) fn to_aligned_bytes<B: AlignedBytes>(&self) -> Option<B> {
37
match self {
38
Self::Int8(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
39
.ok()
40
.map(B::from_unaligned),
41
Self::Int16(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
42
.ok()
43
.map(B::from_unaligned),
44
Self::Int32(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
45
.ok()
46
.map(B::from_unaligned),
47
Self::Int64(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
48
.ok()
49
.map(B::from_unaligned),
50
Self::UInt8(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
51
.ok()
52
.map(B::from_unaligned),
53
Self::UInt16(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
54
.ok()
55
.map(B::from_unaligned),
56
Self::UInt32(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
57
.ok()
58
.map(B::from_unaligned),
59
Self::UInt64(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
60
.ok()
61
.map(B::from_unaligned),
62
Self::Float32(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
63
.ok()
64
.map(B::from_unaligned),
65
Self::Float64(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
66
.ok()
67
.map(B::from_unaligned),
68
_ => None,
69
}
70
}
71
72
pub(crate) fn as_str(&self) -> Option<&str> {
73
match self {
74
Self::String(s) => Some(s.as_ref()),
75
_ => None,
76
}
77
}
78
79
pub(crate) fn as_binary(&self) -> Option<&[u8]> {
80
match self {
81
Self::Binary(s) => Some(s.as_ref()),
82
_ => None,
83
}
84
}
85
86
pub(crate) fn as_bool(&self) -> Option<bool> {
87
match self {
88
Self::Boolean(s) => Some(*s),
89
_ => None,
90
}
91
}
92
}
93
94
#[derive(Clone)]
95
pub enum SpecializedParquetColumnExpr {
96
Equal(ParquetScalar),
97
Between(ParquetScalar, ParquetScalar),
98
EqualOneOf(Box<[ParquetScalar]>),
99
StartsWith(Box<[u8]>),
100
EndsWith(Box<[u8]>),
101
RegexMatch(regex::bytes::Regex),
102
}
103
104
pub type ParquetColumnExprRef = Arc<dyn ParquetColumnExpr>;
105
pub trait ParquetColumnExpr: Send + Sync {
106
fn evaluate(&self, values: &dyn Array) -> Bitmap {
107
let mut bm = BitmapBuilder::new();
108
self.evaluate_mut(values, &mut bm);
109
bm.freeze()
110
}
111
fn evaluate_mut(&self, values: &dyn Array, bm: &mut BitmapBuilder);
112
fn evaluate_null(&self) -> bool;
113
114
fn as_specialized(&self) -> Option<&SpecializedParquetColumnExpr>;
115
}
116
117