Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-ops/src/chunked_array/array/get.rs
6939 views
1
use arrow::array::Array;
2
use polars_compute::gather::sublist::fixed_size_list::{
3
sub_fixed_size_list_get, sub_fixed_size_list_get_literal,
4
};
5
use polars_core::utils::align_chunks_binary;
6
7
use super::*;
8
9
fn array_get_literal(ca: &ArrayChunked, idx: i64, null_on_oob: bool) -> PolarsResult<Series> {
10
let chunks = ca
11
.downcast_iter()
12
.map(|arr| sub_fixed_size_list_get_literal(arr, idx, null_on_oob))
13
.collect::<PolarsResult<Vec<_>>>()?;
14
Series::try_from((ca.name().clone(), chunks))
15
.unwrap()
16
.cast(ca.inner_dtype())
17
}
18
19
/// Get the value by literal index in the array.
20
/// So index `0` would return the first item of every sub-array
21
/// and index `-1` would return the last item of every sub-array
22
/// if an index is out of bounds, it will return a `None`.
23
pub fn array_get(
24
ca: &ArrayChunked,
25
index: &Int64Chunked,
26
null_on_oob: bool,
27
) -> PolarsResult<Series> {
28
polars_ensure!(ca.width() < IdxSize::MAX as usize, ComputeError: "`arr.get` not supported for such wide arrays");
29
30
// Base case. No overflow.
31
if ca.width() * ca.len() < IdxSize::MAX as usize {
32
return array_get_impl(ca, index, null_on_oob);
33
}
34
35
// If the array width * length would overflow. Do it part-by-part.
36
assert!(ca.len() != 1 || index.len() != 1);
37
let rows_per_slice = IdxSize::MAX as usize / ca.width();
38
39
let mut ca = ca.clone();
40
let mut index = index.clone();
41
let current_ca;
42
let current_index;
43
if ca.len() == 1 {
44
current_ca = ca.clone();
45
} else {
46
(current_ca, ca) = ca.split_at(rows_per_slice as i64);
47
}
48
if index.len() == 1 {
49
current_index = index.clone();
50
} else {
51
(current_index, index) = index.split_at(rows_per_slice as i64);
52
}
53
let mut s = array_get_impl(&current_ca, &current_index, null_on_oob)?;
54
55
while !ca.is_empty() && !index.is_empty() {
56
let current_ca;
57
let current_index;
58
if ca.len() == 1 {
59
current_ca = ca.clone();
60
} else {
61
(current_ca, ca) = ca.split_at(rows_per_slice as i64);
62
}
63
if index.len() == 1 {
64
current_index = index.clone();
65
} else {
66
(current_index, index) = index.split_at(rows_per_slice as i64);
67
}
68
s.append_owned(array_get_impl(&current_ca, &current_index, null_on_oob)?)?;
69
}
70
71
Ok(s)
72
}
73
74
fn array_get_impl(
75
ca: &ArrayChunked,
76
index: &Int64Chunked,
77
null_on_oob: bool,
78
) -> PolarsResult<Series> {
79
match index.len() {
80
1 => {
81
let index = index.get(0);
82
if let Some(index) = index {
83
array_get_literal(ca, index, null_on_oob)
84
} else {
85
Ok(Series::full_null(
86
ca.name().clone(),
87
ca.len(),
88
ca.inner_dtype(),
89
))
90
}
91
},
92
len if len == ca.len() => {
93
let out = binary_to_series_arr_get(ca, index, null_on_oob, |arr, idx, nob| {
94
sub_fixed_size_list_get(arr, idx, nob)
95
});
96
out?.cast(ca.inner_dtype())
97
},
98
len => polars_bail!(
99
ComputeError:
100
"`arr.get` expression got an index array of length {} while the array has {} elements",
101
len, ca.len()
102
),
103
}
104
}
105
106
pub fn binary_to_series_arr_get<T, U, F>(
107
lhs: &ChunkedArray<T>,
108
rhs: &ChunkedArray<U>,
109
null_on_oob: bool,
110
mut op: F,
111
) -> PolarsResult<Series>
112
where
113
T: PolarsDataType,
114
U: PolarsDataType,
115
F: FnMut(&T::Array, &U::Array, bool) -> PolarsResult<Box<dyn Array>>,
116
{
117
let (lhs, rhs) = align_chunks_binary(lhs, rhs);
118
let chunks = lhs
119
.downcast_iter()
120
.zip(rhs.downcast_iter())
121
.map(|(lhs_arr, rhs_arr)| op(lhs_arr, rhs_arr, null_on_oob))
122
.collect::<PolarsResult<Vec<_>>>()?;
123
Series::try_from((lhs.name().clone(), chunks))
124
}
125
126