Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/series/series_trait.rs
6940 views
1
use std::any::Any;
2
use std::borrow::Cow;
3
4
use arrow::bitmap::{Bitmap, BitmapBuilder};
5
use polars_compute::rolling::QuantileMethod;
6
#[cfg(feature = "serde")]
7
use serde::{Deserialize, Serialize};
8
9
use crate::chunked_array::cast::CastOptions;
10
#[cfg(feature = "object")]
11
use crate::chunked_array::object::PolarsObjectSafe;
12
use crate::prelude::*;
13
14
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
15
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
16
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
17
pub enum IsSorted {
18
Ascending,
19
Descending,
20
Not,
21
}
22
23
impl IsSorted {
24
pub fn reverse(self) -> Self {
25
use IsSorted::*;
26
match self {
27
Ascending => Descending,
28
Descending => Ascending,
29
Not => Not,
30
}
31
}
32
}
33
34
pub enum BitRepr {
35
U8(UInt8Chunked),
36
U16(UInt16Chunked),
37
U32(UInt32Chunked),
38
U64(UInt64Chunked),
39
#[cfg(feature = "dtype-i128")]
40
I128(Int128Chunked),
41
}
42
43
pub(crate) mod private {
44
use polars_utils::aliases::PlSeedableRandomStateQuality;
45
46
use super::*;
47
use crate::chunked_array::flags::StatisticsFlags;
48
use crate::chunked_array::ops::compare_inner::{TotalEqInner, TotalOrdInner};
49
50
pub trait PrivateSeriesNumeric {
51
/// Return a bit representation
52
///
53
/// If there is no available bit representation this returns `None`.
54
fn bit_repr(&self) -> Option<BitRepr>;
55
}
56
57
pub trait PrivateSeries {
58
#[cfg(feature = "object")]
59
fn get_list_builder(
60
&self,
61
_name: PlSmallStr,
62
_values_capacity: usize,
63
_list_capacity: usize,
64
) -> Box<dyn ListBuilderTrait> {
65
invalid_operation_panic!(get_list_builder, self)
66
}
67
68
/// Get field (used in schema)
69
fn _field(&self) -> Cow<'_, Field>;
70
71
fn _dtype(&self) -> &DataType;
72
73
fn compute_len(&mut self);
74
75
fn _get_flags(&self) -> StatisticsFlags;
76
77
fn _set_flags(&mut self, flags: StatisticsFlags);
78
79
unsafe fn equal_element(
80
&self,
81
_idx_self: usize,
82
_idx_other: usize,
83
_other: &Series,
84
) -> bool {
85
invalid_operation_panic!(equal_element, self)
86
}
87
#[expect(clippy::wrong_self_convention)]
88
fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a>;
89
#[expect(clippy::wrong_self_convention)]
90
fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a>;
91
92
fn vec_hash(
93
&self,
94
_build_hasher: PlSeedableRandomStateQuality,
95
_buf: &mut Vec<u64>,
96
) -> PolarsResult<()>;
97
fn vec_hash_combine(
98
&self,
99
_build_hasher: PlSeedableRandomStateQuality,
100
_hashes: &mut [u64],
101
) -> PolarsResult<()>;
102
103
/// # Safety
104
///
105
/// Does no bounds checks, groups must be correct.
106
#[cfg(feature = "algorithm_group_by")]
107
unsafe fn agg_min(&self, groups: &GroupsType) -> Series {
108
Series::full_null(self._field().name().clone(), groups.len(), self._dtype())
109
}
110
/// # Safety
111
///
112
/// Does no bounds checks, groups must be correct.
113
#[cfg(feature = "algorithm_group_by")]
114
unsafe fn agg_max(&self, groups: &GroupsType) -> Series {
115
Series::full_null(self._field().name().clone(), groups.len(), self._dtype())
116
}
117
/// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
118
/// first cast to `Int64` to prevent overflow issues.
119
#[cfg(feature = "algorithm_group_by")]
120
unsafe fn agg_sum(&self, groups: &GroupsType) -> Series {
121
Series::full_null(self._field().name().clone(), groups.len(), self._dtype())
122
}
123
/// # Safety
124
///
125
/// Does no bounds checks, groups must be correct.
126
#[cfg(feature = "algorithm_group_by")]
127
unsafe fn agg_std(&self, groups: &GroupsType, _ddof: u8) -> Series {
128
Series::full_null(self._field().name().clone(), groups.len(), self._dtype())
129
}
130
/// # Safety
131
///
132
/// Does no bounds checks, groups must be correct.
133
#[cfg(feature = "algorithm_group_by")]
134
unsafe fn agg_var(&self, groups: &GroupsType, _ddof: u8) -> Series {
135
Series::full_null(self._field().name().clone(), groups.len(), self._dtype())
136
}
137
/// # Safety
138
///
139
/// Does no bounds checks, groups must be correct.
140
#[cfg(feature = "algorithm_group_by")]
141
unsafe fn agg_list(&self, groups: &GroupsType) -> Series {
142
Series::full_null(self._field().name().clone(), groups.len(), self._dtype())
143
}
144
145
/// # Safety
146
///
147
/// Does no bounds checks, groups must be correct.
148
#[cfg(feature = "bitwise")]
149
unsafe fn agg_and(&self, groups: &GroupsType) -> Series {
150
Series::full_null(self._field().name().clone(), groups.len(), self._dtype())
151
}
152
153
/// # Safety
154
///
155
/// Does no bounds checks, groups must be correct.
156
#[cfg(feature = "bitwise")]
157
unsafe fn agg_or(&self, groups: &GroupsType) -> Series {
158
Series::full_null(self._field().name().clone(), groups.len(), self._dtype())
159
}
160
161
/// # Safety
162
///
163
/// Does no bounds checks, groups must be correct.
164
#[cfg(feature = "bitwise")]
165
unsafe fn agg_xor(&self, groups: &GroupsType) -> Series {
166
Series::full_null(self._field().name().clone(), groups.len(), self._dtype())
167
}
168
169
fn subtract(&self, _rhs: &Series) -> PolarsResult<Series> {
170
polars_bail!(opq = subtract, self._dtype());
171
}
172
fn add_to(&self, _rhs: &Series) -> PolarsResult<Series> {
173
polars_bail!(opq = add, self._dtype());
174
}
175
fn multiply(&self, _rhs: &Series) -> PolarsResult<Series> {
176
polars_bail!(opq = multiply, self._dtype());
177
}
178
fn divide(&self, _rhs: &Series) -> PolarsResult<Series> {
179
polars_bail!(opq = divide, self._dtype());
180
}
181
fn remainder(&self, _rhs: &Series) -> PolarsResult<Series> {
182
polars_bail!(opq = remainder, self._dtype());
183
}
184
#[cfg(feature = "algorithm_group_by")]
185
fn group_tuples(&self, _multithreaded: bool, _sorted: bool) -> PolarsResult<GroupsType> {
186
polars_bail!(opq = group_tuples, self._dtype());
187
}
188
#[cfg(feature = "zip_with")]
189
fn zip_with_same_type(
190
&self,
191
_mask: &BooleanChunked,
192
_other: &Series,
193
) -> PolarsResult<Series> {
194
polars_bail!(opq = zip_with_same_type, self._dtype());
195
}
196
197
#[allow(unused_variables)]
198
fn arg_sort_multiple(
199
&self,
200
by: &[Column],
201
_options: &SortMultipleOptions,
202
) -> PolarsResult<IdxCa> {
203
polars_bail!(opq = arg_sort_multiple, self._dtype());
204
}
205
}
206
}
207
208
pub trait SeriesTrait:
209
Send + Sync + private::PrivateSeries + private::PrivateSeriesNumeric
210
{
211
/// Rename the Series.
212
fn rename(&mut self, name: PlSmallStr);
213
214
/// Get the lengths of the underlying chunks
215
fn chunk_lengths(&self) -> ChunkLenIter<'_>;
216
217
/// Name of series.
218
fn name(&self) -> &PlSmallStr;
219
220
/// Get field (used in schema)
221
fn field(&self) -> Cow<'_, Field> {
222
self._field()
223
}
224
225
/// Get datatype of series.
226
fn dtype(&self) -> &DataType {
227
self._dtype()
228
}
229
230
/// Underlying chunks.
231
fn chunks(&self) -> &Vec<ArrayRef>;
232
233
/// Underlying chunks.
234
///
235
/// # Safety
236
/// The caller must ensure the length and the data types of `ArrayRef` does not change.
237
unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef>;
238
239
/// Number of chunks in this Series
240
fn n_chunks(&self) -> usize {
241
self.chunks().len()
242
}
243
244
/// Shrink the capacity of this array to fit its length.
245
fn shrink_to_fit(&mut self) {
246
// no-op
247
}
248
249
/// Take `num_elements` from the top as a zero copy view.
250
fn limit(&self, num_elements: usize) -> Series {
251
self.slice(0, num_elements)
252
}
253
254
/// Get a zero copy view of the data.
255
///
256
/// When offset is negative the offset is counted from the
257
/// end of the array
258
fn slice(&self, _offset: i64, _length: usize) -> Series;
259
260
/// Get a zero copy view of the data.
261
///
262
/// When offset is negative the offset is counted from the
263
/// end of the array
264
fn split_at(&self, _offset: i64) -> (Series, Series);
265
266
fn append(&mut self, other: &Series) -> PolarsResult<()>;
267
fn append_owned(&mut self, other: Series) -> PolarsResult<()>;
268
269
#[doc(hidden)]
270
fn extend(&mut self, _other: &Series) -> PolarsResult<()>;
271
272
/// Filter by boolean mask. This operation clones data.
273
fn filter(&self, _filter: &BooleanChunked) -> PolarsResult<Series>;
274
275
/// Take from `self` at the indexes given by `idx`.
276
///
277
/// Null values in `idx` because null values in the output array.
278
///
279
/// This operation is clone.
280
fn take(&self, _indices: &IdxCa) -> PolarsResult<Series>;
281
282
/// Take from `self` at the indexes given by `idx`.
283
///
284
/// Null values in `idx` because null values in the output array.
285
///
286
/// # Safety
287
/// This doesn't check any bounds.
288
unsafe fn take_unchecked(&self, _idx: &IdxCa) -> Series;
289
290
/// Take from `self` at the indexes given by `idx`.
291
///
292
/// This operation is clone.
293
fn take_slice(&self, _indices: &[IdxSize]) -> PolarsResult<Series>;
294
295
/// Take from `self` at the indexes given by `idx`.
296
///
297
/// # Safety
298
/// This doesn't check any bounds.
299
unsafe fn take_slice_unchecked(&self, _idx: &[IdxSize]) -> Series;
300
301
/// Get length of series.
302
fn len(&self) -> usize;
303
304
/// Check if Series is empty.
305
fn is_empty(&self) -> bool {
306
self.len() == 0
307
}
308
309
/// Aggregate all chunks to a contiguous array of memory.
310
fn rechunk(&self) -> Series;
311
312
fn rechunk_validity(&self) -> Option<Bitmap> {
313
if self.chunks().len() == 1 {
314
return self.chunks()[0].validity().cloned();
315
}
316
317
if !self.has_nulls() || self.is_empty() {
318
return None;
319
}
320
321
let mut bm = BitmapBuilder::with_capacity(self.len());
322
for arr in self.chunks() {
323
if let Some(v) = arr.validity() {
324
bm.extend_from_bitmap(v);
325
} else {
326
bm.extend_constant(arr.len(), true);
327
}
328
}
329
bm.into_opt_validity()
330
}
331
332
/// Drop all null values and return a new Series.
333
fn drop_nulls(&self) -> Series {
334
if self.null_count() == 0 {
335
Series(self.clone_inner())
336
} else {
337
self.filter(&self.is_not_null()).unwrap()
338
}
339
}
340
341
/// Returns the sum of the array as an f64.
342
fn _sum_as_f64(&self) -> f64 {
343
invalid_operation_panic!(_sum_as_f64, self)
344
}
345
346
/// Returns the mean value in the array
347
/// Returns an option because the array is nullable.
348
fn mean(&self) -> Option<f64> {
349
None
350
}
351
352
/// Returns the std value in the array
353
/// Returns an option because the array is nullable.
354
fn std(&self, _ddof: u8) -> Option<f64> {
355
None
356
}
357
358
/// Returns the var value in the array
359
/// Returns an option because the array is nullable.
360
fn var(&self, _ddof: u8) -> Option<f64> {
361
None
362
}
363
364
/// Returns the median value in the array
365
/// Returns an option because the array is nullable.
366
fn median(&self) -> Option<f64> {
367
None
368
}
369
370
/// Create a new Series filled with values from the given index.
371
///
372
/// # Example
373
///
374
/// ```rust
375
/// use polars_core::prelude::*;
376
/// let s = Series::new("a".into(), [0i32, 1, 8]);
377
/// let s2 = s.new_from_index(2, 4);
378
/// assert_eq!(Vec::from(s2.i32().unwrap()), &[Some(8), Some(8), Some(8), Some(8)])
379
/// ```
380
fn new_from_index(&self, _index: usize, _length: usize) -> Series;
381
382
/// Trim all lists of unused start and end elements recursively.
383
///
384
/// - `None` if nothing needed to be done.
385
/// - `Some(series)` if something changed.
386
fn trim_lists_to_normalized_offsets(&self) -> Option<Series> {
387
None
388
}
389
390
/// Propagate down nulls in nested types.
391
///
392
/// - `None` if nothing needed to be done.
393
/// - `Some(series)` if something changed.
394
fn propagate_nulls(&self) -> Option<Series> {
395
None
396
}
397
398
/// Find the indices of elements where the null masks are different recursively.
399
fn find_validity_mismatch(&self, other: &Series, idxs: &mut Vec<IdxSize>);
400
401
fn cast(&self, _dtype: &DataType, options: CastOptions) -> PolarsResult<Series>;
402
403
/// Get a single value by index. Don't use this operation for loops as a runtime cast is
404
/// needed for every iteration.
405
fn get(&self, index: usize) -> PolarsResult<AnyValue<'_>> {
406
polars_ensure!(index < self.len(), oob = index, self.len());
407
// SAFETY: Just did bounds check
408
let value = unsafe { self.get_unchecked(index) };
409
Ok(value)
410
}
411
412
/// Get a single value by index. Don't use this operation for loops as a runtime cast is
413
/// needed for every iteration.
414
///
415
/// This may refer to physical types
416
///
417
/// # Safety
418
/// Does not do any bounds checking
419
unsafe fn get_unchecked(&self, _index: usize) -> AnyValue<'_>;
420
421
fn sort_with(&self, _options: SortOptions) -> PolarsResult<Series> {
422
polars_bail!(opq = sort_with, self._dtype());
423
}
424
425
/// Retrieve the indexes needed for a sort.
426
#[allow(unused)]
427
fn arg_sort(&self, options: SortOptions) -> IdxCa {
428
invalid_operation_panic!(arg_sort, self)
429
}
430
431
/// Count the null values.
432
fn null_count(&self) -> usize;
433
434
/// Return if any the chunks in this [`ChunkedArray`] have nulls.
435
fn has_nulls(&self) -> bool;
436
437
/// Get unique values in the Series.
438
fn unique(&self) -> PolarsResult<Series> {
439
polars_bail!(opq = unique, self._dtype());
440
}
441
442
/// Get unique values in the Series.
443
///
444
/// A `null` value also counts as a unique value.
445
fn n_unique(&self) -> PolarsResult<usize> {
446
polars_bail!(opq = n_unique, self._dtype());
447
}
448
449
/// Get first indexes of unique values.
450
fn arg_unique(&self) -> PolarsResult<IdxCa> {
451
polars_bail!(opq = arg_unique, self._dtype());
452
}
453
454
/// Get a mask of the null values.
455
fn is_null(&self) -> BooleanChunked;
456
457
/// Get a mask of the non-null values.
458
fn is_not_null(&self) -> BooleanChunked;
459
460
/// return a Series in reversed order
461
fn reverse(&self) -> Series;
462
463
/// Rechunk and return a pointer to the start of the Series.
464
/// Only implemented for numeric types
465
fn as_single_ptr(&mut self) -> PolarsResult<usize> {
466
polars_bail!(opq = as_single_ptr, self._dtype());
467
}
468
469
/// Shift the values by a given period and fill the parts that will be empty due to this operation
470
/// with `Nones`.
471
///
472
/// *NOTE: If you want to fill the Nones with a value use the
473
/// [`shift` operation on `ChunkedArray<T>`](../chunked_array/ops/trait.ChunkShift.html).*
474
///
475
/// # Example
476
///
477
/// ```rust
478
/// # use polars_core::prelude::*;
479
/// fn example() -> PolarsResult<()> {
480
/// let s = Series::new("series".into(), &[1, 2, 3]);
481
///
482
/// let shifted = s.shift(1);
483
/// assert_eq!(Vec::from(shifted.i32()?), &[None, Some(1), Some(2)]);
484
///
485
/// let shifted = s.shift(-1);
486
/// assert_eq!(Vec::from(shifted.i32()?), &[Some(2), Some(3), None]);
487
///
488
/// let shifted = s.shift(2);
489
/// assert_eq!(Vec::from(shifted.i32()?), &[None, None, Some(1)]);
490
///
491
/// Ok(())
492
/// }
493
/// example();
494
/// ```
495
fn shift(&self, _periods: i64) -> Series;
496
497
/// Get the sum of the Series as a new Scalar.
498
///
499
/// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
500
/// first cast to `Int64` to prevent overflow issues.
501
fn sum_reduce(&self) -> PolarsResult<Scalar> {
502
polars_bail!(opq = sum, self._dtype());
503
}
504
/// Get the max of the Series as a new Series of length 1.
505
fn max_reduce(&self) -> PolarsResult<Scalar> {
506
polars_bail!(opq = max, self._dtype());
507
}
508
/// Get the min of the Series as a new Series of length 1.
509
fn min_reduce(&self) -> PolarsResult<Scalar> {
510
polars_bail!(opq = min, self._dtype());
511
}
512
/// Get the median of the Series as a new Series of length 1.
513
fn median_reduce(&self) -> PolarsResult<Scalar> {
514
polars_bail!(opq = median, self._dtype());
515
}
516
/// Get the variance of the Series as a new Series of length 1.
517
fn var_reduce(&self, _ddof: u8) -> PolarsResult<Scalar> {
518
polars_bail!(opq = var, self._dtype());
519
}
520
/// Get the standard deviation of the Series as a new Series of length 1.
521
fn std_reduce(&self, _ddof: u8) -> PolarsResult<Scalar> {
522
polars_bail!(opq = std, self._dtype());
523
}
524
/// Get the quantile of the ChunkedArray as a new Series of length 1.
525
fn quantile_reduce(&self, _quantile: f64, _method: QuantileMethod) -> PolarsResult<Scalar> {
526
polars_bail!(opq = quantile, self._dtype());
527
}
528
/// Get the bitwise AND of the Series as a new Series of length 1,
529
fn and_reduce(&self) -> PolarsResult<Scalar> {
530
polars_bail!(opq = and_reduce, self._dtype());
531
}
532
/// Get the bitwise OR of the Series as a new Series of length 1,
533
fn or_reduce(&self) -> PolarsResult<Scalar> {
534
polars_bail!(opq = or_reduce, self._dtype());
535
}
536
/// Get the bitwise XOR of the Series as a new Series of length 1,
537
fn xor_reduce(&self) -> PolarsResult<Scalar> {
538
polars_bail!(opq = xor_reduce, self._dtype());
539
}
540
541
/// Get the first element of the [`Series`] as a [`Scalar`]
542
///
543
/// If the [`Series`] is empty, a [`Scalar`] with a [`AnyValue::Null`] is returned.
544
fn first(&self) -> Scalar {
545
let dt = self.dtype();
546
let av = self.get(0).map_or(AnyValue::Null, AnyValue::into_static);
547
548
Scalar::new(dt.clone(), av)
549
}
550
551
/// Get the last element of the [`Series`] as a [`Scalar`]
552
///
553
/// If the [`Series`] is empty, a [`Scalar`] with a [`AnyValue::Null`] is returned.
554
fn last(&self) -> Scalar {
555
let dt = self.dtype();
556
let av = if self.len() == 0 {
557
AnyValue::Null
558
} else {
559
// SAFETY: len-1 < len if len != 0
560
unsafe { self.get_unchecked(self.len() - 1) }.into_static()
561
};
562
563
Scalar::new(dt.clone(), av)
564
}
565
566
#[cfg(feature = "approx_unique")]
567
fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
568
polars_bail!(opq = approx_n_unique, self._dtype());
569
}
570
571
/// Clone inner ChunkedArray and wrap in a new Arc
572
fn clone_inner(&self) -> Arc<dyn SeriesTrait>;
573
574
#[cfg(feature = "object")]
575
/// Get the value at this index as a downcastable Any trait ref.
576
fn get_object(&self, _index: usize) -> Option<&dyn PolarsObjectSafe> {
577
invalid_operation_panic!(get_object, self)
578
}
579
580
#[cfg(feature = "object")]
581
/// Get the value at this index as a downcastable Any trait ref.
582
///
583
/// # Safety
584
/// This function doesn't do any bound checks.
585
unsafe fn get_object_chunked_unchecked(
586
&self,
587
_chunk: usize,
588
_index: usize,
589
) -> Option<&dyn PolarsObjectSafe> {
590
invalid_operation_panic!(get_object_chunked_unchecked, self)
591
}
592
593
/// Get a hold of the [`ChunkedArray`], [`Logical`] or `NullChunked` as an `Any` trait
594
/// reference.
595
fn as_any(&self) -> &dyn Any;
596
597
/// Get a hold of the [`ChunkedArray`], [`Logical`] or `NullChunked` as an `Any` trait mutable
598
/// reference.
599
fn as_any_mut(&mut self) -> &mut dyn Any;
600
601
/// Get a hold of the [`ChunkedArray`] or `NullChunked` as an `Any` trait reference. This
602
/// pierces through `Logical` types to get the underlying physical array.
603
fn as_phys_any(&self) -> &dyn Any;
604
605
fn as_arc_any(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
606
607
#[cfg(feature = "checked_arithmetic")]
608
fn checked_div(&self, _rhs: &Series) -> PolarsResult<Series> {
609
polars_bail!(opq = checked_div, self._dtype());
610
}
611
612
#[cfg(feature = "rolling_window")]
613
/// Apply a custom function over a rolling/ moving window of the array.
614
/// This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.
615
fn rolling_map(
616
&self,
617
_f: &dyn Fn(&Series) -> PolarsResult<Series>,
618
_options: RollingOptionsFixedWindow,
619
) -> PolarsResult<Series> {
620
polars_bail!(opq = rolling_map, self._dtype());
621
}
622
}
623
624
impl dyn SeriesTrait + '_ {
625
pub fn unpack<T: PolarsPhysicalType>(&self) -> PolarsResult<&ChunkedArray<T>> {
626
polars_ensure!(&T::get_static_dtype() == self.dtype(), unpack);
627
Ok(self.as_ref())
628
}
629
}
630
631