Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/series/implementations/null.rs
6940 views
1
use std::any::Any;
2
3
use polars_error::constants::LENGTH_LIMIT_MSG;
4
5
use self::compare_inner::TotalOrdInner;
6
use super::*;
7
use crate::chunked_array::ops::compare_inner::{IntoTotalEqInner, NonNull, TotalEqInner};
8
use crate::chunked_array::ops::sort::arg_sort_multiple::arg_sort_multiple_impl;
9
use crate::prelude::*;
10
use crate::series::private::{PrivateSeries, PrivateSeriesNumeric};
11
use crate::series::*;
12
13
impl Series {
14
pub fn new_null(name: PlSmallStr, len: usize) -> Series {
15
NullChunked::new(name, len).into_series()
16
}
17
}
18
19
#[derive(Clone)]
20
pub struct NullChunked {
21
pub(crate) name: PlSmallStr,
22
length: IdxSize,
23
// we still need chunks as many series consumers expect
24
// chunks to be there
25
chunks: Vec<ArrayRef>,
26
}
27
28
impl NullChunked {
29
pub(crate) fn new(name: PlSmallStr, len: usize) -> Self {
30
Self {
31
name,
32
length: len as IdxSize,
33
chunks: vec![Box::new(arrow::array::NullArray::new(
34
ArrowDataType::Null,
35
len,
36
))],
37
}
38
}
39
40
pub fn len(&self) -> usize {
41
self.length as usize
42
}
43
44
pub fn is_empty(&self) -> bool {
45
self.length == 0
46
}
47
}
48
impl PrivateSeriesNumeric for NullChunked {
49
fn bit_repr(&self) -> Option<BitRepr> {
50
Some(BitRepr::U32(UInt32Chunked::full_null(
51
self.name.clone(),
52
self.len(),
53
)))
54
}
55
}
56
57
impl PrivateSeries for NullChunked {
58
fn compute_len(&mut self) {
59
fn inner(chunks: &[ArrayRef]) -> usize {
60
match chunks.len() {
61
// fast path
62
1 => chunks[0].len(),
63
_ => chunks.iter().fold(0, |acc, arr| acc + arr.len()),
64
}
65
}
66
self.length = IdxSize::try_from(inner(&self.chunks)).expect(LENGTH_LIMIT_MSG);
67
}
68
fn _field(&self) -> Cow<'_, Field> {
69
Cow::Owned(Field::new(self.name().clone(), DataType::Null))
70
}
71
72
#[allow(unused)]
73
fn _set_flags(&mut self, flags: StatisticsFlags) {}
74
75
fn _dtype(&self) -> &DataType {
76
&DataType::Null
77
}
78
79
#[cfg(feature = "zip_with")]
80
fn zip_with_same_type(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult<Series> {
81
let len = match (self.len(), mask.len(), other.len()) {
82
(a, b, c) if a == b && b == c => a,
83
(1, a, b) | (a, 1, b) | (a, b, 1) if a == b => a,
84
(a, 1, 1) | (1, a, 1) | (1, 1, a) => a,
85
(_, 0, _) => 0,
86
_ => {
87
polars_bail!(ShapeMismatch: "shapes of `self`, `mask` and `other` are not suitable for `zip_with` operation")
88
},
89
};
90
91
Ok(Self::new(self.name().clone(), len).into_series())
92
}
93
94
fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
95
IntoTotalEqInner::into_total_eq_inner(self)
96
}
97
fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
98
IntoTotalOrdInner::into_total_ord_inner(self)
99
}
100
101
fn subtract(&self, _rhs: &Series) -> PolarsResult<Series> {
102
null_arithmetic(self, _rhs, "subtract")
103
}
104
105
fn add_to(&self, _rhs: &Series) -> PolarsResult<Series> {
106
null_arithmetic(self, _rhs, "add_to")
107
}
108
fn multiply(&self, _rhs: &Series) -> PolarsResult<Series> {
109
null_arithmetic(self, _rhs, "multiply")
110
}
111
fn divide(&self, _rhs: &Series) -> PolarsResult<Series> {
112
null_arithmetic(self, _rhs, "divide")
113
}
114
fn remainder(&self, _rhs: &Series) -> PolarsResult<Series> {
115
null_arithmetic(self, _rhs, "remainder")
116
}
117
118
#[cfg(feature = "algorithm_group_by")]
119
fn group_tuples(&self, _multithreaded: bool, _sorted: bool) -> PolarsResult<GroupsType> {
120
Ok(if self.is_empty() {
121
GroupsType::default()
122
} else {
123
GroupsType::Slice {
124
groups: vec![[0, self.length]],
125
rolling: false,
126
}
127
})
128
}
129
130
#[cfg(feature = "algorithm_group_by")]
131
unsafe fn agg_list(&self, groups: &GroupsType) -> Series {
132
AggList::agg_list(self, groups)
133
}
134
135
fn _get_flags(&self) -> StatisticsFlags {
136
StatisticsFlags::empty()
137
}
138
139
fn vec_hash(
140
&self,
141
random_state: PlSeedableRandomStateQuality,
142
buf: &mut Vec<u64>,
143
) -> PolarsResult<()> {
144
VecHash::vec_hash(self, random_state, buf)?;
145
Ok(())
146
}
147
148
fn vec_hash_combine(
149
&self,
150
build_hasher: PlSeedableRandomStateQuality,
151
hashes: &mut [u64],
152
) -> PolarsResult<()> {
153
VecHash::vec_hash_combine(self, build_hasher, hashes)?;
154
Ok(())
155
}
156
157
fn arg_sort_multiple(
158
&self,
159
by: &[Column],
160
options: &SortMultipleOptions,
161
) -> PolarsResult<IdxCa> {
162
let vals = (0..self.len())
163
.map(|i| (i as IdxSize, NonNull(())))
164
.collect();
165
arg_sort_multiple_impl(vals, by, options)
166
}
167
}
168
169
fn null_arithmetic(lhs: &NullChunked, rhs: &Series, op: &str) -> PolarsResult<Series> {
170
let output_len = match (lhs.len(), rhs.len()) {
171
(1, len_r) => len_r,
172
(len_l, 1) => len_l,
173
(len_l, len_r) if len_l == len_r => len_l,
174
_ => polars_bail!(ComputeError: "Cannot {:?} two series of different lengths.", op),
175
};
176
Ok(NullChunked::new(lhs.name().clone(), output_len).into_series())
177
}
178
179
impl SeriesTrait for NullChunked {
180
fn name(&self) -> &PlSmallStr {
181
&self.name
182
}
183
184
fn rename(&mut self, name: PlSmallStr) {
185
self.name = name
186
}
187
188
fn chunks(&self) -> &Vec<ArrayRef> {
189
&self.chunks
190
}
191
unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
192
&mut self.chunks
193
}
194
195
fn chunk_lengths(&self) -> ChunkLenIter<'_> {
196
self.chunks.iter().map(|chunk| chunk.len())
197
}
198
199
fn take(&self, indices: &IdxCa) -> PolarsResult<Series> {
200
Ok(NullChunked::new(self.name.clone(), indices.len()).into_series())
201
}
202
203
unsafe fn take_unchecked(&self, indices: &IdxCa) -> Series {
204
NullChunked::new(self.name.clone(), indices.len()).into_series()
205
}
206
207
fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Series> {
208
Ok(NullChunked::new(self.name.clone(), indices.len()).into_series())
209
}
210
211
unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Series {
212
NullChunked::new(self.name.clone(), indices.len()).into_series()
213
}
214
215
fn len(&self) -> usize {
216
self.length as usize
217
}
218
219
fn has_nulls(&self) -> bool {
220
!self.is_empty()
221
}
222
223
fn rechunk(&self) -> Series {
224
NullChunked::new(self.name.clone(), self.len()).into_series()
225
}
226
227
fn drop_nulls(&self) -> Series {
228
NullChunked::new(self.name.clone(), 0).into_series()
229
}
230
231
fn cast(&self, dtype: &DataType, _cast_options: CastOptions) -> PolarsResult<Series> {
232
Ok(Series::full_null(self.name.clone(), self.len(), dtype))
233
}
234
235
fn null_count(&self) -> usize {
236
self.len()
237
}
238
239
#[cfg(feature = "algorithm_group_by")]
240
fn unique(&self) -> PolarsResult<Series> {
241
let ca = NullChunked::new(self.name.clone(), self.n_unique().unwrap());
242
Ok(ca.into_series())
243
}
244
245
#[cfg(feature = "algorithm_group_by")]
246
fn n_unique(&self) -> PolarsResult<usize> {
247
let n = if self.is_empty() { 0 } else { 1 };
248
Ok(n)
249
}
250
251
#[cfg(feature = "algorithm_group_by")]
252
fn arg_unique(&self) -> PolarsResult<IdxCa> {
253
let idxs: Vec<IdxSize> = (0..self.n_unique().unwrap() as IdxSize).collect();
254
Ok(IdxCa::new(self.name().clone(), idxs))
255
}
256
257
fn new_from_index(&self, _index: usize, length: usize) -> Series {
258
NullChunked::new(self.name.clone(), length).into_series()
259
}
260
261
unsafe fn get_unchecked(&self, _index: usize) -> AnyValue<'_> {
262
AnyValue::Null
263
}
264
265
fn slice(&self, offset: i64, length: usize) -> Series {
266
let (chunks, len) = chunkops::slice(&self.chunks, offset, length, self.len());
267
NullChunked {
268
name: self.name.clone(),
269
length: len as IdxSize,
270
chunks,
271
}
272
.into_series()
273
}
274
275
fn split_at(&self, offset: i64) -> (Series, Series) {
276
let (l, r) = chunkops::split_at(self.chunks(), offset, self.len());
277
(
278
NullChunked {
279
name: self.name.clone(),
280
length: l.iter().map(|arr| arr.len() as IdxSize).sum(),
281
chunks: l,
282
}
283
.into_series(),
284
NullChunked {
285
name: self.name.clone(),
286
length: r.iter().map(|arr| arr.len() as IdxSize).sum(),
287
chunks: r,
288
}
289
.into_series(),
290
)
291
}
292
293
fn sort_with(&self, _options: SortOptions) -> PolarsResult<Series> {
294
Ok(self.clone().into_series())
295
}
296
297
fn arg_sort(&self, _options: SortOptions) -> IdxCa {
298
IdxCa::from_vec(self.name().clone(), (0..self.len() as IdxSize).collect())
299
}
300
301
fn is_null(&self) -> BooleanChunked {
302
BooleanChunked::full(self.name().clone(), true, self.len())
303
}
304
305
fn is_not_null(&self) -> BooleanChunked {
306
BooleanChunked::full(self.name().clone(), false, self.len())
307
}
308
309
fn reverse(&self) -> Series {
310
self.clone().into_series()
311
}
312
313
fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Series> {
314
let len = if self.is_empty() {
315
// We still allow a length of `1` because it could be `lit(true)`.
316
polars_ensure!(filter.len() <= 1, ShapeMismatch: "filter's length: {} differs from that of the series: 0", filter.len());
317
0
318
} else if filter.len() == 1 {
319
return match filter.get(0) {
320
Some(true) => Ok(self.clone().into_series()),
321
None | Some(false) => Ok(NullChunked::new(self.name.clone(), 0).into_series()),
322
};
323
} else {
324
polars_ensure!(filter.len() == self.len(), ShapeMismatch: "filter's length: {} differs from that of the series: {}", filter.len(), self.len());
325
filter.sum().unwrap_or(0) as usize
326
};
327
Ok(NullChunked::new(self.name.clone(), len).into_series())
328
}
329
330
fn shift(&self, _periods: i64) -> Series {
331
self.clone().into_series()
332
}
333
334
fn append(&mut self, other: &Series) -> PolarsResult<()> {
335
polars_ensure!(other.dtype() == &DataType::Null, ComputeError: "expected null dtype");
336
// we don't create a new null array to keep probability of aligned chunks higher
337
self.length += other.len() as IdxSize;
338
self.chunks.extend(other.chunks().iter().cloned());
339
Ok(())
340
}
341
fn append_owned(&mut self, mut other: Series) -> PolarsResult<()> {
342
polars_ensure!(other.dtype() == &DataType::Null, ComputeError: "expected null dtype");
343
// we don't create a new null array to keep probability of aligned chunks higher
344
let other: &mut NullChunked = other._get_inner_mut().as_any_mut().downcast_mut().unwrap();
345
self.length += other.len() as IdxSize;
346
self.chunks.extend(std::mem::take(&mut other.chunks));
347
Ok(())
348
}
349
350
fn extend(&mut self, other: &Series) -> PolarsResult<()> {
351
*self = NullChunked::new(self.name.clone(), self.len() + other.len());
352
Ok(())
353
}
354
355
fn clone_inner(&self) -> Arc<dyn SeriesTrait> {
356
Arc::new(self.clone())
357
}
358
359
fn find_validity_mismatch(&self, other: &Series, idxs: &mut Vec<IdxSize>) {
360
ChunkNestingUtils::find_validity_mismatch(self, other, idxs)
361
}
362
363
fn as_any(&self) -> &dyn Any {
364
self
365
}
366
367
fn as_any_mut(&mut self) -> &mut dyn Any {
368
self
369
}
370
371
fn as_phys_any(&self) -> &dyn Any {
372
self
373
}
374
375
fn as_arc_any(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
376
self as _
377
}
378
}
379
380
unsafe impl IntoSeries for NullChunked {
381
fn into_series(self) -> Series
382
where
383
Self: Sized,
384
{
385
Series(Arc::new(self))
386
}
387
}
388
389