Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/series/implementations/null.rs
8420 views
1
use std::any::Any;
2
3
use polars_error::constants::LENGTH_LIMIT_MSG;
4
5
use self::compare_inner::TotalOrdInner;
6
use super::*;
7
use crate::chunked_array::ops::compare_inner::{IntoTotalEqInner, NonNull, TotalEqInner};
8
use crate::chunked_array::ops::sort::arg_sort_multiple::arg_sort_multiple_impl;
9
use crate::prelude::*;
10
use crate::series::private::{PrivateSeries, PrivateSeriesNumeric};
11
use crate::series::*;
12
13
impl Series {
14
pub fn new_null(name: PlSmallStr, len: usize) -> Series {
15
NullChunked::new(name, len).into_series()
16
}
17
}
18
19
#[derive(Clone)]
20
pub struct NullChunked {
21
pub(crate) name: PlSmallStr,
22
length: IdxSize,
23
// we still need chunks as many series consumers expect
24
// chunks to be there
25
chunks: Vec<ArrayRef>,
26
}
27
28
impl NullChunked {
29
pub(crate) fn new(name: PlSmallStr, len: usize) -> Self {
30
Self {
31
name,
32
length: len as IdxSize,
33
chunks: vec![Box::new(arrow::array::NullArray::new(
34
ArrowDataType::Null,
35
len,
36
))],
37
}
38
}
39
40
pub fn len(&self) -> usize {
41
self.length as usize
42
}
43
44
pub fn is_empty(&self) -> bool {
45
self.length == 0
46
}
47
}
48
impl PrivateSeriesNumeric for NullChunked {
49
fn bit_repr(&self) -> Option<BitRepr> {
50
Some(BitRepr::U32(UInt32Chunked::full_null(
51
self.name.clone(),
52
self.len(),
53
)))
54
}
55
}
56
57
impl PrivateSeries for NullChunked {
58
fn compute_len(&mut self) {
59
fn inner(chunks: &[ArrayRef]) -> usize {
60
match chunks.len() {
61
// fast path
62
1 => chunks[0].len(),
63
_ => chunks.iter().fold(0, |acc, arr| acc + arr.len()),
64
}
65
}
66
self.length = IdxSize::try_from(inner(&self.chunks)).expect(LENGTH_LIMIT_MSG);
67
}
68
fn _field(&self) -> Cow<'_, Field> {
69
Cow::Owned(Field::new(self.name().clone(), DataType::Null))
70
}
71
72
#[allow(unused)]
73
fn _set_flags(&mut self, flags: StatisticsFlags) {}
74
75
fn _dtype(&self) -> &DataType {
76
&DataType::Null
77
}
78
79
#[cfg(feature = "zip_with")]
80
fn zip_with_same_type(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult<Series> {
81
let len = match (self.len(), mask.len(), other.len()) {
82
(a, b, c) if a == b && b == c => a,
83
(1, a, b) | (a, 1, b) | (a, b, 1) if a == b => a,
84
(a, 1, 1) | (1, a, 1) | (1, 1, a) => a,
85
(_, 0, _) => 0,
86
_ => {
87
polars_bail!(ShapeMismatch: "shapes of `self`, `mask` and `other` are not suitable for `zip_with` operation")
88
},
89
};
90
91
Ok(Self::new(self.name().clone(), len).into_series())
92
}
93
94
fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
95
IntoTotalEqInner::into_total_eq_inner(self)
96
}
97
fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
98
IntoTotalOrdInner::into_total_ord_inner(self)
99
}
100
101
fn subtract(&self, _rhs: &Series) -> PolarsResult<Series> {
102
null_arithmetic(self, _rhs, "subtract")
103
}
104
105
fn add_to(&self, _rhs: &Series) -> PolarsResult<Series> {
106
null_arithmetic(self, _rhs, "add_to")
107
}
108
fn multiply(&self, _rhs: &Series) -> PolarsResult<Series> {
109
null_arithmetic(self, _rhs, "multiply")
110
}
111
fn divide(&self, _rhs: &Series) -> PolarsResult<Series> {
112
null_arithmetic(self, _rhs, "divide")
113
}
114
fn remainder(&self, _rhs: &Series) -> PolarsResult<Series> {
115
null_arithmetic(self, _rhs, "remainder")
116
}
117
118
#[cfg(feature = "algorithm_group_by")]
119
fn group_tuples(&self, _multithreaded: bool, _sorted: bool) -> PolarsResult<GroupsType> {
120
Ok(if self.is_empty() {
121
GroupsType::default()
122
} else {
123
GroupsType::new_slice(vec![[0, self.length]], false, true)
124
})
125
}
126
127
#[cfg(feature = "algorithm_group_by")]
128
unsafe fn agg_list(&self, groups: &GroupsType) -> Series {
129
AggList::agg_list(self, groups)
130
}
131
132
fn _get_flags(&self) -> StatisticsFlags {
133
StatisticsFlags::empty()
134
}
135
136
fn vec_hash(
137
&self,
138
random_state: PlSeedableRandomStateQuality,
139
buf: &mut Vec<u64>,
140
) -> PolarsResult<()> {
141
VecHash::vec_hash(self, random_state, buf)?;
142
Ok(())
143
}
144
145
fn vec_hash_combine(
146
&self,
147
build_hasher: PlSeedableRandomStateQuality,
148
hashes: &mut [u64],
149
) -> PolarsResult<()> {
150
VecHash::vec_hash_combine(self, build_hasher, hashes)?;
151
Ok(())
152
}
153
154
fn arg_sort_multiple(
155
&self,
156
by: &[Column],
157
options: &SortMultipleOptions,
158
) -> PolarsResult<IdxCa> {
159
let vals = (0..self.len())
160
.map(|i| (i as IdxSize, NonNull(())))
161
.collect();
162
arg_sort_multiple_impl(vals, by, options)
163
}
164
}
165
166
fn null_arithmetic(lhs: &NullChunked, rhs: &Series, op: &str) -> PolarsResult<Series> {
167
let output_len = match (lhs.len(), rhs.len()) {
168
(1, len_r) => len_r,
169
(len_l, 1) => len_l,
170
(len_l, len_r) if len_l == len_r => len_l,
171
_ => polars_bail!(ComputeError: "Cannot {:?} two series of different lengths.", op),
172
};
173
Ok(NullChunked::new(lhs.name().clone(), output_len).into_series())
174
}
175
176
impl SeriesTrait for NullChunked {
177
fn name(&self) -> &PlSmallStr {
178
&self.name
179
}
180
181
fn rename(&mut self, name: PlSmallStr) {
182
self.name = name
183
}
184
185
fn chunks(&self) -> &Vec<ArrayRef> {
186
&self.chunks
187
}
188
unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
189
&mut self.chunks
190
}
191
192
fn chunk_lengths(&self) -> ChunkLenIter<'_> {
193
self.chunks.iter().map(|chunk| chunk.len())
194
}
195
196
fn take(&self, indices: &IdxCa) -> PolarsResult<Series> {
197
Ok(NullChunked::new(self.name.clone(), indices.len()).into_series())
198
}
199
200
unsafe fn take_unchecked(&self, indices: &IdxCa) -> Series {
201
NullChunked::new(self.name.clone(), indices.len()).into_series()
202
}
203
204
fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Series> {
205
Ok(NullChunked::new(self.name.clone(), indices.len()).into_series())
206
}
207
208
unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Series {
209
NullChunked::new(self.name.clone(), indices.len()).into_series()
210
}
211
212
fn deposit(&self, validity: &Bitmap) -> Series {
213
assert_eq!(validity.set_bits(), 0);
214
self.clone().into_series()
215
}
216
217
fn len(&self) -> usize {
218
self.length as usize
219
}
220
221
fn has_nulls(&self) -> bool {
222
!self.is_empty()
223
}
224
225
fn rechunk(&self) -> Series {
226
NullChunked::new(self.name.clone(), self.len()).into_series()
227
}
228
229
fn drop_nulls(&self) -> Series {
230
NullChunked::new(self.name.clone(), 0).into_series()
231
}
232
233
fn cast(&self, dtype: &DataType, _cast_options: CastOptions) -> PolarsResult<Series> {
234
Ok(Series::full_null(self.name.clone(), self.len(), dtype))
235
}
236
237
fn null_count(&self) -> usize {
238
self.len()
239
}
240
241
#[cfg(feature = "algorithm_group_by")]
242
fn unique(&self) -> PolarsResult<Series> {
243
let ca = NullChunked::new(self.name.clone(), self.n_unique().unwrap());
244
Ok(ca.into_series())
245
}
246
247
#[cfg(feature = "algorithm_group_by")]
248
fn n_unique(&self) -> PolarsResult<usize> {
249
let n = if self.is_empty() { 0 } else { 1 };
250
Ok(n)
251
}
252
253
#[cfg(feature = "algorithm_group_by")]
254
fn arg_unique(&self) -> PolarsResult<IdxCa> {
255
let idxs: Vec<IdxSize> = (0..self.n_unique().unwrap() as IdxSize).collect();
256
Ok(IdxCa::new(self.name().clone(), idxs))
257
}
258
259
fn unique_id(&self) -> PolarsResult<(IdxSize, Vec<IdxSize>)> {
260
if self.is_empty() {
261
Ok((0, Vec::new()))
262
} else {
263
Ok((1, vec![0; self.len()]))
264
}
265
}
266
267
fn new_from_index(&self, _index: usize, length: usize) -> Series {
268
NullChunked::new(self.name.clone(), length).into_series()
269
}
270
271
unsafe fn get_unchecked(&self, _index: usize) -> AnyValue<'_> {
272
AnyValue::Null
273
}
274
275
fn slice(&self, offset: i64, length: usize) -> Series {
276
let (chunks, len) = chunkops::slice(&self.chunks, offset, length, self.len());
277
NullChunked {
278
name: self.name.clone(),
279
length: len as IdxSize,
280
chunks,
281
}
282
.into_series()
283
}
284
285
fn split_at(&self, offset: i64) -> (Series, Series) {
286
let (l, r) = chunkops::split_at(self.chunks(), offset, self.len());
287
(
288
NullChunked {
289
name: self.name.clone(),
290
length: l.iter().map(|arr| arr.len() as IdxSize).sum(),
291
chunks: l,
292
}
293
.into_series(),
294
NullChunked {
295
name: self.name.clone(),
296
length: r.iter().map(|arr| arr.len() as IdxSize).sum(),
297
chunks: r,
298
}
299
.into_series(),
300
)
301
}
302
303
fn sort_with(&self, _options: SortOptions) -> PolarsResult<Series> {
304
Ok(self.clone().into_series())
305
}
306
307
fn arg_sort(&self, _options: SortOptions) -> IdxCa {
308
IdxCa::from_vec(self.name().clone(), (0..self.len() as IdxSize).collect())
309
}
310
311
fn is_null(&self) -> BooleanChunked {
312
BooleanChunked::full(self.name().clone(), true, self.len())
313
}
314
315
fn is_not_null(&self) -> BooleanChunked {
316
BooleanChunked::full(self.name().clone(), false, self.len())
317
}
318
319
fn reverse(&self) -> Series {
320
self.clone().into_series()
321
}
322
323
fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Series> {
324
let len = if self.is_empty() {
325
// We still allow a length of `1` because it could be `lit(true)`.
326
polars_ensure!(filter.len() <= 1, ShapeMismatch: "filter's length: {} differs from that of the series: 0", filter.len());
327
0
328
} else if filter.len() == 1 {
329
return match filter.get(0) {
330
Some(true) => Ok(self.clone().into_series()),
331
None | Some(false) => Ok(NullChunked::new(self.name.clone(), 0).into_series()),
332
};
333
} else {
334
polars_ensure!(filter.len() == self.len(), ShapeMismatch: "filter's length: {} differs from that of the series: {}", filter.len(), self.len());
335
filter.sum().unwrap_or(0) as usize
336
};
337
Ok(NullChunked::new(self.name.clone(), len).into_series())
338
}
339
340
fn shift(&self, _periods: i64) -> Series {
341
self.clone().into_series()
342
}
343
344
fn sum_reduce(&self) -> PolarsResult<Scalar> {
345
Ok(Scalar::null(DataType::Null))
346
}
347
348
fn min_reduce(&self) -> PolarsResult<Scalar> {
349
Ok(Scalar::null(DataType::Null))
350
}
351
352
fn max_reduce(&self) -> PolarsResult<Scalar> {
353
Ok(Scalar::null(DataType::Null))
354
}
355
356
fn mean_reduce(&self) -> PolarsResult<Scalar> {
357
Ok(Scalar::null(DataType::Null))
358
}
359
360
fn median_reduce(&self) -> PolarsResult<Scalar> {
361
Ok(Scalar::null(DataType::Null))
362
}
363
364
fn std_reduce(&self, _ddof: u8) -> PolarsResult<Scalar> {
365
Ok(Scalar::null(DataType::Null))
366
}
367
368
fn var_reduce(&self, _ddof: u8) -> PolarsResult<Scalar> {
369
Ok(Scalar::null(DataType::Null))
370
}
371
372
fn append(&mut self, other: &Series) -> PolarsResult<()> {
373
polars_ensure!(other.dtype() == &DataType::Null, ComputeError: "expected null dtype");
374
// we don't create a new null array to keep probability of aligned chunks higher
375
self.length += other.len() as IdxSize;
376
self.chunks.extend(other.chunks().iter().cloned());
377
Ok(())
378
}
379
fn append_owned(&mut self, mut other: Series) -> PolarsResult<()> {
380
polars_ensure!(other.dtype() == &DataType::Null, ComputeError: "expected null dtype");
381
// we don't create a new null array to keep probability of aligned chunks higher
382
let other: &mut NullChunked = other._get_inner_mut().as_any_mut().downcast_mut().unwrap();
383
self.length += other.len() as IdxSize;
384
self.chunks.extend(std::mem::take(&mut other.chunks));
385
Ok(())
386
}
387
388
fn extend(&mut self, other: &Series) -> PolarsResult<()> {
389
*self = NullChunked::new(self.name.clone(), self.len() + other.len());
390
Ok(())
391
}
392
393
#[cfg(feature = "approx_unique")]
394
fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
395
Ok(if self.is_empty() { 0 } else { 1 })
396
}
397
398
fn clone_inner(&self) -> Arc<dyn SeriesTrait> {
399
Arc::new(self.clone())
400
}
401
402
fn find_validity_mismatch(&self, other: &Series, idxs: &mut Vec<IdxSize>) {
403
ChunkNestingUtils::find_validity_mismatch(self, other, idxs)
404
}
405
406
fn as_any(&self) -> &dyn Any {
407
self
408
}
409
410
fn as_any_mut(&mut self) -> &mut dyn Any {
411
self
412
}
413
414
fn as_phys_any(&self) -> &dyn Any {
415
self
416
}
417
418
fn as_arc_any(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
419
self as _
420
}
421
}
422
423
unsafe impl IntoSeries for NullChunked {
424
fn into_series(self) -> Series
425
where
426
Self: Sized,
427
{
428
Series(Arc::new(self))
429
}
430
}
431
432