Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-ops/src/chunked_array/array/namespace.rs
6939 views
1
use arrow::array::builder::{ShareStrategy, make_builder};
2
use arrow::array::{Array, FixedSizeListArray};
3
use arrow::bitmap::BitmapBuilder;
4
use polars_core::prelude::arity::unary_kernel;
5
use polars_core::utils::slice_offsets;
6
7
use super::min_max::AggType;
8
use super::*;
9
#[cfg(feature = "array_count")]
10
use crate::chunked_array::array::count::array_count_matches;
11
use crate::chunked_array::array::count::count_boolean_bits;
12
use crate::chunked_array::array::sum_mean::sum_with_nulls;
13
#[cfg(feature = "array_any_all")]
14
use crate::prelude::array::any_all::{array_all, array_any};
15
use crate::prelude::array::get::array_get;
16
use crate::prelude::array::join::array_join;
17
use crate::prelude::array::sum_mean::sum_array_numerical;
18
use crate::series::ArgAgg;
19
20
pub fn has_inner_nulls(ca: &ArrayChunked) -> bool {
21
for arr in ca.downcast_iter() {
22
if arr.values().null_count() > 0 {
23
return true;
24
}
25
}
26
false
27
}
28
29
fn get_agg(ca: &ArrayChunked, agg_type: AggType) -> Series {
30
let values = ca.get_inner();
31
let width = ca.width();
32
min_max::array_dispatch(ca.name().clone(), &values, width, agg_type)
33
}
34
35
pub trait ArrayNameSpace: AsArray {
36
fn array_max(&self) -> Series {
37
let ca = self.as_array();
38
get_agg(ca, AggType::Max)
39
}
40
41
fn array_min(&self) -> Series {
42
let ca = self.as_array();
43
get_agg(ca, AggType::Min)
44
}
45
46
fn array_sum(&self) -> PolarsResult<Series> {
47
let ca = self.as_array();
48
49
if has_inner_nulls(ca) {
50
return sum_with_nulls(ca, ca.inner_dtype());
51
};
52
53
match ca.inner_dtype() {
54
DataType::Boolean => Ok(count_boolean_bits(ca).into_series()),
55
dt if dt.is_primitive_numeric() => Ok(sum_array_numerical(ca, dt)),
56
dt => sum_with_nulls(ca, dt),
57
}
58
}
59
60
fn array_mean(&self) -> PolarsResult<Series> {
61
let ca = self.as_array();
62
dispersion::mean_with_nulls(ca)
63
}
64
65
fn array_median(&self) -> PolarsResult<Series> {
66
let ca = self.as_array();
67
dispersion::median_with_nulls(ca)
68
}
69
70
fn array_std(&self, ddof: u8) -> PolarsResult<Series> {
71
let ca = self.as_array();
72
dispersion::std_with_nulls(ca, ddof)
73
}
74
75
fn array_var(&self, ddof: u8) -> PolarsResult<Series> {
76
let ca = self.as_array();
77
dispersion::var_with_nulls(ca, ddof)
78
}
79
80
fn array_unique(&self) -> PolarsResult<ListChunked> {
81
let ca = self.as_array();
82
ca.try_apply_amortized_to_list(|s| s.as_ref().unique())
83
}
84
85
fn array_unique_stable(&self) -> PolarsResult<ListChunked> {
86
let ca = self.as_array();
87
ca.try_apply_amortized_to_list(|s| s.as_ref().unique_stable())
88
}
89
90
fn array_n_unique(&self) -> PolarsResult<IdxCa> {
91
let ca = self.as_array();
92
ca.try_apply_amortized_generic(|opt_s| {
93
let opt_v = opt_s.map(|s| s.as_ref().n_unique()).transpose()?;
94
Ok(opt_v.map(|idx| idx as IdxSize))
95
})
96
}
97
98
#[cfg(feature = "array_any_all")]
99
fn array_any(&self) -> PolarsResult<Series> {
100
let ca = self.as_array();
101
array_any(ca)
102
}
103
104
#[cfg(feature = "array_any_all")]
105
fn array_all(&self) -> PolarsResult<Series> {
106
let ca = self.as_array();
107
array_all(ca)
108
}
109
110
fn array_sort(&self, options: SortOptions) -> PolarsResult<ArrayChunked> {
111
let ca = self.as_array();
112
// SAFETY: Sort only changes the order of the elements in each subarray.
113
unsafe { ca.try_apply_amortized_same_type(|s| s.as_ref().sort_with(options)) }
114
}
115
116
fn array_reverse(&self) -> ArrayChunked {
117
let ca = self.as_array();
118
// SAFETY: Reverse only changes the order of the elements in each subarray
119
unsafe { ca.apply_amortized_same_type(|s| s.as_ref().reverse()) }
120
}
121
122
fn array_arg_min(&self) -> IdxCa {
123
let ca = self.as_array();
124
ca.apply_amortized_generic(|opt_s| {
125
opt_s.and_then(|s| s.as_ref().arg_min().map(|idx| idx as IdxSize))
126
})
127
}
128
129
fn array_arg_max(&self) -> IdxCa {
130
let ca = self.as_array();
131
ca.apply_amortized_generic(|opt_s| {
132
opt_s.and_then(|s| s.as_ref().arg_max().map(|idx| idx as IdxSize))
133
})
134
}
135
136
fn array_get(&self, index: &Int64Chunked, null_on_oob: bool) -> PolarsResult<Series> {
137
let ca = self.as_array();
138
array_get(ca, index, null_on_oob)
139
}
140
141
fn array_join(&self, separator: &StringChunked, ignore_nulls: bool) -> PolarsResult<Series> {
142
let ca = self.as_array();
143
array_join(ca, separator, ignore_nulls).map(|ok| ok.into_series())
144
}
145
146
#[cfg(feature = "array_count")]
147
fn array_count_matches(&self, element: AnyValue) -> PolarsResult<Series> {
148
let ca = self.as_array();
149
array_count_matches(ca, element)
150
}
151
152
fn array_shift(&self, n: &Series) -> PolarsResult<Series> {
153
let ca = self.as_array();
154
let n_s = n.cast(&DataType::Int64)?;
155
let n = n_s.i64()?;
156
let out = match (ca.len(), n.len()) {
157
(a, b) if a == b => {
158
// SAFETY: Shift does not change the dtype and number of elements of sub-array.
159
unsafe {
160
ca.zip_and_apply_amortized_same_type(n, |opt_s, opt_periods| {
161
match (opt_s, opt_periods) {
162
(Some(s), Some(n)) => Some(s.as_ref().shift(n)),
163
_ => None,
164
}
165
})
166
}
167
},
168
(_, 1) => {
169
if let Some(n) = n.get(0) {
170
// SAFETY: Shift does not change the dtype and number of elements of sub-array.
171
unsafe { ca.apply_amortized_same_type(|s| s.as_ref().shift(n)) }
172
} else {
173
ArrayChunked::full_null_with_dtype(
174
ca.name().clone(),
175
ca.len(),
176
ca.inner_dtype(),
177
ca.width(),
178
)
179
}
180
},
181
(1, _) => {
182
if ca.get(0).is_some() {
183
// Optimize: This does not need to broadcast first.
184
let ca = ca.new_from_index(0, n.len());
185
// SAFETY: Shift does not change the dtype and number of elements of sub-array.
186
unsafe {
187
ca.zip_and_apply_amortized_same_type(n, |opt_s, opt_periods| {
188
match (opt_s, opt_periods) {
189
(Some(s), Some(n)) => Some(s.as_ref().shift(n)),
190
_ => None,
191
}
192
})
193
}
194
} else {
195
ArrayChunked::full_null_with_dtype(
196
ca.name().clone(),
197
ca.len(),
198
ca.inner_dtype(),
199
ca.width(),
200
)
201
}
202
},
203
_ => polars_bail!(length_mismatch = "arr.shift", ca.len(), n.len()),
204
};
205
Ok(out.into_series())
206
}
207
208
fn array_slice(&self, offset: i64, length: i64) -> PolarsResult<Series> {
209
let slice_arr: ArrayChunked = unary_kernel(
210
self.as_array(),
211
move |arr: &FixedSizeListArray| -> FixedSizeListArray {
212
let length: usize = if length < 0 {
213
(arr.size() as i64 + length).max(0)
214
} else {
215
length
216
}
217
.try_into()
218
.expect("Length can not be larger than i64::MAX");
219
let (raw_offset, slice_len) = slice_offsets(offset, length, arr.size());
220
221
let mut builder = make_builder(arr.values().dtype());
222
builder.reserve(slice_len * arr.len());
223
224
let mut validity = BitmapBuilder::with_capacity(arr.len());
225
226
let values = arr.values().as_ref();
227
for row in 0..arr.len() {
228
if !arr.is_valid(row) {
229
validity.push(false);
230
continue;
231
}
232
let inner_offset = row * arr.size() + raw_offset;
233
builder.subslice_extend(values, inner_offset, slice_len, ShareStrategy::Always);
234
validity.push(true);
235
}
236
let values = builder.freeze_reset();
237
let sliced_dtype = match arr.dtype() {
238
ArrowDataType::FixedSizeList(inner, _) => {
239
ArrowDataType::FixedSizeList(inner.clone(), slice_len)
240
},
241
_ => unreachable!(),
242
};
243
FixedSizeListArray::new(
244
sliced_dtype,
245
arr.len(),
246
values,
247
validity.into_opt_validity(),
248
)
249
},
250
);
251
Ok(slice_arr.into_series())
252
}
253
}
254
255
impl ArrayNameSpace for ArrayChunked {}
256
257