Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-stream/src/nodes/io_sinks2/components/partition_key.rs
7884 views
1
use arrow::array::{BinaryViewArray, FixedSizeBinaryArray, PrimitiveArray};
2
use arrow::buffer::Buffer;
3
use arrow::datatypes::ArrowDataType;
4
use polars_core::prelude::{Column, DataType, LargeBinaryArray};
5
use polars_core::with_match_physical_integer_type;
6
7
pub type PartitionKey = polars_utils::small_bytes::SmallBytes;
8
9
pub enum PreComputedKeys {
10
Binview(BinaryViewArray),
11
Primitive(FixedSizeBinaryArray),
12
RowEncoded(LargeBinaryArray),
13
}
14
15
impl PreComputedKeys {
16
#[expect(unused)]
17
pub fn name(&self) -> &'static str {
18
match self {
19
Self::Binview(_) => "Binview",
20
Self::Primitive(_) => "Primitive",
21
Self::RowEncoded(_) => "RowEncoded",
22
}
23
}
24
25
pub fn opt_new_non_encoded(column: &Column) -> Option<Self> {
26
Some(match column.dtype() {
27
DataType::Binary => Self::Binview(
28
column
29
.binary()
30
.unwrap()
31
.rechunk()
32
.downcast_as_array()
33
.clone(),
34
),
35
DataType::String => Self::Binview(
36
column
37
.str()
38
.unwrap()
39
.as_binary()
40
.rechunk()
41
.downcast_as_array()
42
.clone(),
43
),
44
dt if dt.is_primitive() && dt.to_physical().is_integer() => {
45
let c = column.to_physical_repr();
46
47
let [arr] = c
48
.as_materialized_series()
49
.rechunk()
50
.into_chunks()
51
.try_into()
52
.unwrap();
53
54
let (bytes, width): (Buffer<u8>, usize) = with_match_physical_integer_type!(dt, |$T| {
55
let arr: &PrimitiveArray<$T> = arr.as_any().downcast_ref().unwrap();
56
(arr.values().clone().try_transmute().unwrap(), std::mem::size_of::<$T>())
57
});
58
59
assert_eq!(width * arr.len(), bytes.len());
60
61
let arr = FixedSizeBinaryArray::new(
62
ArrowDataType::FixedSizeBinary(width),
63
bytes,
64
arr.validity().cloned(),
65
);
66
67
PreComputedKeys::Primitive(arr)
68
},
69
_ => return None,
70
})
71
}
72
73
#[inline]
74
pub fn get_key(&self, idx: usize) -> PartitionKey {
75
match self {
76
Self::Binview(arr) => PartitionKey::from_opt_slice(arr.get(idx)),
77
Self::Primitive(arr) => PartitionKey::from_opt_slice(arr.get(idx)),
78
Self::RowEncoded(arr) => PartitionKey::from_slice(unsafe { arr.value_unchecked(idx) }),
79
}
80
}
81
}
82
83