Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/series/mod.rs
6940 views
1
#![allow(unsafe_op_in_unsafe_fn)]
2
//! Type agnostic columnar data structure.
3
use crate::chunked_array::flags::StatisticsFlags;
4
pub use crate::prelude::ChunkCompareEq;
5
use crate::prelude::*;
6
use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};
7
8
macro_rules! invalid_operation_panic {
9
($op:ident, $s:expr) => {
10
panic!(
11
"`{}` operation not supported for dtype `{}`",
12
stringify!($op),
13
$s._dtype()
14
)
15
};
16
}
17
18
pub mod amortized_iter;
19
mod any_value;
20
pub mod arithmetic;
21
pub mod builder;
22
mod comparison;
23
mod from;
24
pub mod implementations;
25
mod into;
26
pub(crate) mod iterator;
27
pub mod ops;
28
mod series_trait;
29
30
use std::borrow::Cow;
31
use std::hash::{Hash, Hasher};
32
use std::ops::Deref;
33
34
use arrow::compute::aggregate::estimated_bytes_size;
35
use arrow::offset::Offsets;
36
pub use from::*;
37
pub use iterator::{SeriesIter, SeriesPhysIter};
38
use num_traits::NumCast;
39
use polars_error::feature_gated;
40
pub use series_trait::{IsSorted, *};
41
42
use crate::POOL;
43
use crate::chunked_array::cast::CastOptions;
44
#[cfg(feature = "zip_with")]
45
use crate::series::arithmetic::coerce_lhs_rhs;
46
use crate::utils::{Wrap, handle_casting_failures, materialize_dyn_int};
47
48
/// # Series
49
/// The columnar data type for a DataFrame.
50
///
51
/// Most of the available functions are defined in the [SeriesTrait trait](crate::series::SeriesTrait).
52
///
53
/// The `Series` struct consists
54
/// of typed [ChunkedArray]'s. To quickly cast
55
/// a `Series` to a `ChunkedArray` you can call the method with the name of the type:
56
///
57
/// ```
58
/// # use polars_core::prelude::*;
59
/// let s: Series = [1, 2, 3].iter().collect();
60
/// // Quickly obtain the ChunkedArray wrapped by the Series.
61
/// let chunked_array = s.i32().unwrap();
62
/// ```
63
///
64
/// ## Arithmetic
65
///
66
/// You can do standard arithmetic on series.
67
/// ```
68
/// # use polars_core::prelude::*;
69
/// let s = Series::new("a".into(), [1 , 2, 3]);
70
/// let out_add = &s + &s;
71
/// let out_sub = &s - &s;
72
/// let out_div = &s / &s;
73
/// let out_mul = &s * &s;
74
/// ```
75
///
76
/// Or with series and numbers.
77
///
78
/// ```
79
/// # use polars_core::prelude::*;
80
/// let s: Series = (1..3).collect();
81
/// let out_add_one = &s + 1;
82
/// let out_multiply = &s * 10;
83
///
84
/// // Could not overload left hand side operator.
85
/// let out_divide = 1.div(&s);
86
/// let out_add = 1.add(&s);
87
/// let out_subtract = 1.sub(&s);
88
/// let out_multiply = 1.mul(&s);
89
/// ```
90
///
91
/// ## Comparison
92
/// You can obtain boolean mask by comparing series.
93
///
94
/// ```
95
/// # use polars_core::prelude::*;
96
/// let s = Series::new("dollars".into(), &[1, 2, 3]);
97
/// let mask = s.equal(1).unwrap();
98
/// let valid = [true, false, false].iter();
99
/// assert!(mask
100
/// .into_iter()
101
/// .map(|opt_bool| opt_bool.unwrap()) // option, because series can be null
102
/// .zip(valid)
103
/// .all(|(a, b)| a == *b))
104
/// ```
105
///
106
/// See all the comparison operators in the [ChunkCompareEq trait](crate::chunked_array::ops::ChunkCompareEq) and
107
/// [ChunkCompareIneq trait](crate::chunked_array::ops::ChunkCompareIneq).
108
///
109
/// ## Iterators
110
/// The Series variants contain differently typed [ChunkedArray]s.
111
/// These structs can be turned into iterators, making it possible to use any function/ closure you want
112
/// on a Series.
113
///
114
/// These iterators return an `Option<T>` because the values of a series may be null.
115
///
116
/// ```
117
/// use polars_core::prelude::*;
118
/// let pi = 3.14;
119
/// let s = Series::new("angle".into(), [2f32 * pi, pi, 1.5 * pi].as_ref());
120
/// let s_cos: Series = s.f32()
121
/// .expect("series was not an f32 dtype")
122
/// .into_iter()
123
/// .map(|opt_angle| opt_angle.map(|angle| angle.cos()))
124
/// .collect();
125
/// ```
126
///
127
/// ## Creation
128
/// Series can be create from different data structures. Below we'll show a few ways we can create
129
/// a Series object.
130
///
131
/// ```
132
/// # use polars_core::prelude::*;
133
/// // Series can be created from Vec's, slices and arrays
134
/// Series::new("boolean series".into(), &[true, false, true]);
135
/// Series::new("int series".into(), &[1, 2, 3]);
136
/// // And can be nullable
137
/// Series::new("got nulls".into(), &[Some(1), None, Some(2)]);
138
///
139
/// // Series can also be collected from iterators
140
/// let from_iter: Series = (0..10)
141
/// .into_iter()
142
/// .collect();
143
///
144
/// ```
145
#[derive(Clone)]
146
#[must_use]
147
pub struct Series(pub Arc<dyn SeriesTrait>);
148
149
impl PartialEq for Wrap<Series> {
150
fn eq(&self, other: &Self) -> bool {
151
self.0.equals_missing(other)
152
}
153
}
154
155
impl Eq for Wrap<Series> {}
156
157
impl Hash for Wrap<Series> {
158
fn hash<H: Hasher>(&self, state: &mut H) {
159
let rs = PlSeedableRandomStateQuality::fixed();
160
let mut h = vec![];
161
if self.0.vec_hash(rs, &mut h).is_ok() {
162
let h = h.into_iter().fold(0, |a: u64, b| a.wrapping_add(b));
163
h.hash(state)
164
} else {
165
self.len().hash(state);
166
self.null_count().hash(state);
167
self.dtype().hash(state);
168
}
169
}
170
}
171
172
impl Series {
173
/// Create a new empty Series.
174
pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Series {
175
Series::full_null(name, 0, dtype)
176
}
177
178
pub fn clear(&self) -> Series {
179
if self.is_empty() {
180
self.clone()
181
} else {
182
match self.dtype() {
183
#[cfg(feature = "object")]
184
DataType::Object(_) => self
185
.take(&ChunkedArray::<IdxType>::new_vec(PlSmallStr::EMPTY, vec![]))
186
.unwrap(),
187
dt => Series::new_empty(self.name().clone(), dt),
188
}
189
}
190
}
191
192
#[doc(hidden)]
193
pub fn _get_inner_mut(&mut self) -> &mut dyn SeriesTrait {
194
if Arc::weak_count(&self.0) + Arc::strong_count(&self.0) != 1 {
195
self.0 = self.0.clone_inner();
196
}
197
Arc::get_mut(&mut self.0).expect("implementation error")
198
}
199
200
/// Take or clone a owned copy of the inner [`ChunkedArray`].
201
pub fn take_inner<T: PolarsPhysicalType>(self) -> ChunkedArray<T> {
202
let arc_any = self.0.as_arc_any();
203
let downcast = arc_any
204
.downcast::<implementations::SeriesWrap<ChunkedArray<T>>>()
205
.unwrap();
206
207
match Arc::try_unwrap(downcast) {
208
Ok(ca) => ca.0,
209
Err(ca) => ca.as_ref().as_ref().clone(),
210
}
211
}
212
213
/// # Safety
214
/// The caller must ensure the length and the data types of `ArrayRef` does not change.
215
/// And that the null_count is updated (e.g. with a `compute_len()`)
216
pub unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
217
#[allow(unused_mut)]
218
let mut ca = self._get_inner_mut();
219
ca.chunks_mut()
220
}
221
222
pub fn into_chunks(mut self) -> Vec<ArrayRef> {
223
let ca = self._get_inner_mut();
224
let chunks = std::mem::take(unsafe { ca.chunks_mut() });
225
ca.compute_len();
226
chunks
227
}
228
229
// TODO! this probably can now be removed, now we don't have special case for structs.
230
pub fn select_chunk(&self, i: usize) -> Self {
231
let mut new = self.clear();
232
let mut flags = self.get_flags();
233
234
use StatisticsFlags as F;
235
flags &= F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST;
236
237
// Assign mut so we go through arc only once.
238
let mut_new = new._get_inner_mut();
239
let chunks = unsafe { mut_new.chunks_mut() };
240
let chunk = self.chunks()[i].clone();
241
chunks.clear();
242
chunks.push(chunk);
243
mut_new.compute_len();
244
mut_new._set_flags(flags);
245
new
246
}
247
248
pub fn is_sorted_flag(&self) -> IsSorted {
249
if self.len() <= 1 {
250
return IsSorted::Ascending;
251
}
252
self.get_flags().is_sorted()
253
}
254
255
pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
256
let mut flags = self.get_flags();
257
flags.set_sorted(sorted);
258
self.set_flags(flags);
259
}
260
261
pub(crate) fn clear_flags(&mut self) {
262
self.set_flags(StatisticsFlags::empty());
263
}
264
pub fn get_flags(&self) -> StatisticsFlags {
265
self.0._get_flags()
266
}
267
268
pub(crate) fn set_flags(&mut self, flags: StatisticsFlags) {
269
self._get_inner_mut()._set_flags(flags)
270
}
271
272
pub fn into_frame(self) -> DataFrame {
273
// SAFETY: A single-column dataframe cannot have length mismatches or duplicate names
274
unsafe { DataFrame::new_no_checks(self.len(), vec![self.into()]) }
275
}
276
277
/// Rename series.
278
pub fn rename(&mut self, name: PlSmallStr) -> &mut Series {
279
self._get_inner_mut().rename(name);
280
self
281
}
282
283
/// Return this Series with a new name.
284
pub fn with_name(mut self, name: PlSmallStr) -> Series {
285
self.rename(name);
286
self
287
}
288
289
pub fn from_arrow_chunks(name: PlSmallStr, arrays: Vec<ArrayRef>) -> PolarsResult<Series> {
290
Self::try_from((name, arrays))
291
}
292
293
pub fn from_arrow(name: PlSmallStr, array: ArrayRef) -> PolarsResult<Series> {
294
Self::try_from((name, array))
295
}
296
297
/// Shrink the capacity of this array to fit its length.
298
pub fn shrink_to_fit(&mut self) {
299
self._get_inner_mut().shrink_to_fit()
300
}
301
302
/// Append in place. This is done by adding the chunks of `other` to this [`Series`].
303
///
304
/// See [`ChunkedArray::append`] and [`ChunkedArray::extend`].
305
pub fn append(&mut self, other: &Series) -> PolarsResult<&mut Self> {
306
let must_cast = other.dtype().matches_schema_type(self.dtype())?;
307
if must_cast {
308
let other = other.cast(self.dtype())?;
309
self.append_owned(other)?;
310
} else {
311
self._get_inner_mut().append(other)?;
312
}
313
Ok(self)
314
}
315
316
/// Append in place. This is done by adding the chunks of `other` to this [`Series`].
317
///
318
/// See [`ChunkedArray::append_owned`] and [`ChunkedArray::extend`].
319
pub fn append_owned(&mut self, other: Series) -> PolarsResult<&mut Self> {
320
let must_cast = other.dtype().matches_schema_type(self.dtype())?;
321
if must_cast {
322
let other = other.cast(self.dtype())?;
323
self._get_inner_mut().append_owned(other)?;
324
} else {
325
self._get_inner_mut().append_owned(other)?;
326
}
327
Ok(self)
328
}
329
330
/// Redo a length and null_count compute
331
pub fn compute_len(&mut self) {
332
self._get_inner_mut().compute_len()
333
}
334
335
/// Extend the memory backed by this array with the values from `other`.
336
///
337
/// See [`ChunkedArray::extend`] and [`ChunkedArray::append`].
338
pub fn extend(&mut self, other: &Series) -> PolarsResult<&mut Self> {
339
let must_cast = other.dtype().matches_schema_type(self.dtype())?;
340
if must_cast {
341
let other = other.cast(self.dtype())?;
342
self._get_inner_mut().extend(&other)?;
343
} else {
344
self._get_inner_mut().extend(other)?;
345
}
346
Ok(self)
347
}
348
349
/// Sort the series with specific options.
350
///
351
/// # Example
352
///
353
/// ```rust
354
/// # use polars_core::prelude::*;
355
/// # fn main() -> PolarsResult<()> {
356
/// let s = Series::new("foo".into(), [2, 1, 3]);
357
/// let sorted = s.sort(SortOptions::default())?;
358
/// assert_eq!(sorted, Series::new("foo".into(), [1, 2, 3]));
359
/// # Ok(())
360
/// }
361
/// ```
362
///
363
/// See [`SortOptions`] for more options.
364
pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {
365
self.sort_with(sort_options)
366
}
367
368
/// Only implemented for numeric types
369
pub fn as_single_ptr(&mut self) -> PolarsResult<usize> {
370
self._get_inner_mut().as_single_ptr()
371
}
372
373
pub fn cast(&self, dtype: &DataType) -> PolarsResult<Self> {
374
self.cast_with_options(dtype, CastOptions::NonStrict)
375
}
376
377
/// Cast [`Series`] to another [`DataType`].
378
pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
379
let slf = self
380
.trim_lists_to_normalized_offsets()
381
.map_or(Cow::Borrowed(self), Cow::Owned);
382
let slf = slf.propagate_nulls().map_or(slf, Cow::Owned);
383
384
use DataType as D;
385
let do_clone = match dtype {
386
D::Unknown(UnknownKind::Any | UnknownKind::Ufunc) => true,
387
D::Unknown(UnknownKind::Int(_)) if slf.dtype().is_integer() => true,
388
D::Unknown(UnknownKind::Float) if slf.dtype().is_float() => true,
389
D::Unknown(UnknownKind::Str)
390
if slf.dtype().is_string() | slf.dtype().is_categorical() =>
391
{
392
true
393
},
394
dt if dt.is_primitive() && dt == slf.dtype() => true,
395
_ => false,
396
};
397
398
if do_clone {
399
return Ok(slf.into_owned());
400
}
401
402
pub fn cast_dtype(dtype: &DataType) -> Option<DataType> {
403
match dtype {
404
D::Unknown(UnknownKind::Int(v)) => Some(materialize_dyn_int(*v).dtype()),
405
D::Unknown(UnknownKind::Float) => Some(DataType::Float64),
406
D::Unknown(UnknownKind::Str) => Some(DataType::String),
407
// Best leave as is.
408
D::List(inner) => cast_dtype(inner.as_ref()).map(Box::new).map(D::List),
409
#[cfg(feature = "dtype-struct")]
410
D::Struct(fields) => {
411
// @NOTE: We only allocate if we really need to.
412
413
let mut field_iter = fields.iter().enumerate();
414
let mut new_fields = loop {
415
let (i, field) = field_iter.next()?;
416
417
if let Some(dtype) = cast_dtype(&field.dtype) {
418
let mut new_fields = Vec::with_capacity(fields.len());
419
new_fields.extend(fields.iter().take(i).cloned());
420
new_fields.push(Field {
421
name: field.name.clone(),
422
dtype,
423
});
424
break new_fields;
425
}
426
};
427
428
new_fields.extend(fields.iter().skip(new_fields.len()).cloned().map(|field| {
429
let dtype = cast_dtype(&field.dtype).unwrap_or(field.dtype);
430
Field {
431
name: field.name,
432
dtype,
433
}
434
}));
435
436
Some(D::Struct(new_fields))
437
},
438
_ => None,
439
}
440
}
441
442
let mut casted = cast_dtype(dtype);
443
if dtype.is_list() && dtype.inner_dtype().is_some_and(|dt| dt.is_null()) {
444
if let Some(from_inner_dtype) = slf.dtype().inner_dtype() {
445
casted = Some(DataType::List(Box::new(from_inner_dtype.clone())));
446
}
447
}
448
let dtype = match casted {
449
None => dtype,
450
Some(ref dtype) => dtype,
451
};
452
453
// Always allow casting all nulls to other all nulls.
454
let len = slf.len();
455
if slf.null_count() == len {
456
return Ok(Series::full_null(slf.name().clone(), len, dtype));
457
}
458
459
let new_options = match options {
460
// Strictness is handled on this level to improve error messages.
461
CastOptions::Strict => CastOptions::NonStrict,
462
opt => opt,
463
};
464
465
let out = slf.0.cast(dtype, new_options)?;
466
if options.is_strict() {
467
handle_casting_failures(slf.as_ref(), &out)?;
468
}
469
Ok(out)
470
}
471
472
/// Cast from physical to logical types without any checks on the validity of the cast.
473
///
474
/// # Safety
475
///
476
/// This can lead to invalid memory access in downstream code.
477
pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Self> {
478
match self.dtype() {
479
#[cfg(feature = "dtype-struct")]
480
DataType::Struct(_) => self.struct_().unwrap().cast_unchecked(dtype),
481
DataType::List(_) => self.list().unwrap().cast_unchecked(dtype),
482
dt if dt.is_primitive_numeric() => {
483
with_match_physical_numeric_polars_type!(dt, |$T| {
484
let ca: &ChunkedArray<$T> = self.as_ref().as_ref().as_ref();
485
ca.cast_unchecked(dtype)
486
})
487
},
488
DataType::Binary => self.binary().unwrap().cast_unchecked(dtype),
489
_ => self.cast_with_options(dtype, CastOptions::Overflowing),
490
}
491
}
492
493
/// Convert a non-logical series back into a logical series without casting.
494
///
495
/// # Safety
496
///
497
/// This can lead to invalid memory access in downstream code.
498
pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Self> {
499
debug_assert!(!self.dtype().is_logical(), "{:?}", self.dtype());
500
501
if self.dtype() == dtype {
502
return Ok(self.clone());
503
}
504
505
use DataType as D;
506
match (self.dtype(), dtype) {
507
#[cfg(feature = "dtype-decimal")]
508
(D::Int128, D::Decimal(precision, scale)) => {
509
self.clone().into_decimal(*precision, scale.unwrap())
510
},
511
512
#[cfg(feature = "dtype-categorical")]
513
(phys, D::Categorical(cats, _)) if &cats.physical().dtype() == phys => {
514
with_match_categorical_physical_type!(cats.physical(), |$C| {
515
type CA = ChunkedArray<<$C as PolarsCategoricalType>::PolarsPhysical>;
516
let ca = self.as_ref().as_any().downcast_ref::<CA>().unwrap();
517
Ok(CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(
518
ca.clone(),
519
dtype.clone(),
520
)
521
.into_series())
522
})
523
},
524
#[cfg(feature = "dtype-categorical")]
525
(phys, D::Enum(fcats, _)) if &fcats.physical().dtype() == phys => {
526
with_match_categorical_physical_type!(fcats.physical(), |$C| {
527
type CA = ChunkedArray<<$C as PolarsCategoricalType>::PolarsPhysical>;
528
let ca = self.as_ref().as_any().downcast_ref::<CA>().unwrap();
529
Ok(CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(
530
ca.clone(),
531
dtype.clone(),
532
)
533
.into_series())
534
})
535
},
536
537
(D::Int32, D::Date) => feature_gated!("dtype-time", Ok(self.clone().into_date())),
538
(D::Int64, D::Datetime(tu, tz)) => feature_gated!(
539
"dtype-datetime",
540
Ok(self.clone().into_datetime(*tu, tz.clone()))
541
),
542
(D::Int64, D::Duration(tu)) => {
543
feature_gated!("dtype-duration", Ok(self.clone().into_duration(*tu)))
544
},
545
(D::Int64, D::Time) => feature_gated!("dtype-time", Ok(self.clone().into_time())),
546
547
(D::List(_), D::List(to)) => unsafe {
548
self.list()
549
.unwrap()
550
.from_physical_unchecked(to.as_ref().clone())
551
.map(|ca| ca.into_series())
552
},
553
#[cfg(feature = "dtype-array")]
554
(D::Array(_, lw), D::Array(to, rw)) if lw == rw => unsafe {
555
self.array()
556
.unwrap()
557
.from_physical_unchecked(to.as_ref().clone())
558
.map(|ca| ca.into_series())
559
},
560
#[cfg(feature = "dtype-struct")]
561
(D::Struct(_), D::Struct(to)) => unsafe {
562
self.struct_()
563
.unwrap()
564
.from_physical_unchecked(to.as_slice())
565
.map(|ca| ca.into_series())
566
},
567
568
_ => panic!("invalid from_physical({dtype:?}) for {:?}", self.dtype()),
569
}
570
}
571
572
/// Cast numerical types to f64, and keep floats as is.
573
pub fn to_float(&self) -> PolarsResult<Series> {
574
match self.dtype() {
575
DataType::Float32 | DataType::Float64 => Ok(self.clone()),
576
_ => self.cast_with_options(&DataType::Float64, CastOptions::Overflowing),
577
}
578
}
579
580
/// Compute the sum of all values in this Series.
581
/// Returns `Some(0)` if the array is empty, and `None` if the array only
582
/// contains null values.
583
///
584
/// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
585
/// first cast to `Int64` to prevent overflow issues.
586
pub fn sum<T>(&self) -> PolarsResult<T>
587
where
588
T: NumCast,
589
{
590
let sum = self.sum_reduce()?;
591
let sum = sum.value().extract().unwrap();
592
Ok(sum)
593
}
594
595
/// Returns the minimum value in the array, according to the natural order.
596
/// Returns an option because the array is nullable.
597
pub fn min<T>(&self) -> PolarsResult<Option<T>>
598
where
599
T: NumCast,
600
{
601
let min = self.min_reduce()?;
602
let min = min.value().extract::<T>();
603
Ok(min)
604
}
605
606
/// Returns the maximum value in the array, according to the natural order.
607
/// Returns an option because the array is nullable.
608
pub fn max<T>(&self) -> PolarsResult<Option<T>>
609
where
610
T: NumCast,
611
{
612
let max = self.max_reduce()?;
613
let max = max.value().extract::<T>();
614
Ok(max)
615
}
616
617
/// Explode a list Series. This expands every item to a new row..
618
pub fn explode(&self, skip_empty: bool) -> PolarsResult<Series> {
619
match self.dtype() {
620
DataType::List(_) => self.list().unwrap().explode(skip_empty),
621
#[cfg(feature = "dtype-array")]
622
DataType::Array(_, _) => self.array().unwrap().explode(skip_empty),
623
_ => Ok(self.clone()),
624
}
625
}
626
627
/// Check if numeric value is NaN (note this is different than missing/ null)
628
pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {
629
match self.dtype() {
630
DataType::Float32 => Ok(self.f32().unwrap().is_nan()),
631
DataType::Float64 => Ok(self.f64().unwrap().is_nan()),
632
DataType::Null => Ok(BooleanChunked::full_null(self.name().clone(), self.len())),
633
dt if dt.is_primitive_numeric() => {
634
let arr = BooleanArray::full(self.len(), false, ArrowDataType::Boolean)
635
.with_validity(self.rechunk_validity());
636
Ok(BooleanChunked::with_chunk(self.name().clone(), arr))
637
},
638
_ => polars_bail!(opq = is_nan, self.dtype()),
639
}
640
}
641
642
/// Check if numeric value is NaN (note this is different than missing/null)
643
pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {
644
match self.dtype() {
645
DataType::Float32 => Ok(self.f32().unwrap().is_not_nan()),
646
DataType::Float64 => Ok(self.f64().unwrap().is_not_nan()),
647
dt if dt.is_primitive_numeric() => {
648
let arr = BooleanArray::full(self.len(), true, ArrowDataType::Boolean)
649
.with_validity(self.rechunk_validity());
650
Ok(BooleanChunked::with_chunk(self.name().clone(), arr))
651
},
652
_ => polars_bail!(opq = is_not_nan, self.dtype()),
653
}
654
}
655
656
/// Check if numeric value is finite
657
pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {
658
match self.dtype() {
659
DataType::Float32 => Ok(self.f32().unwrap().is_finite()),
660
DataType::Float64 => Ok(self.f64().unwrap().is_finite()),
661
DataType::Null => Ok(BooleanChunked::full_null(self.name().clone(), self.len())),
662
dt if dt.is_primitive_numeric() => {
663
let arr = BooleanArray::full(self.len(), true, ArrowDataType::Boolean)
664
.with_validity(self.rechunk_validity());
665
Ok(BooleanChunked::with_chunk(self.name().clone(), arr))
666
},
667
_ => polars_bail!(opq = is_finite, self.dtype()),
668
}
669
}
670
671
/// Check if numeric value is infinite
672
pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {
673
match self.dtype() {
674
DataType::Float32 => Ok(self.f32().unwrap().is_infinite()),
675
DataType::Float64 => Ok(self.f64().unwrap().is_infinite()),
676
DataType::Null => Ok(BooleanChunked::full_null(self.name().clone(), self.len())),
677
dt if dt.is_primitive_numeric() => {
678
let arr = BooleanArray::full(self.len(), false, ArrowDataType::Boolean)
679
.with_validity(self.rechunk_validity());
680
Ok(BooleanChunked::with_chunk(self.name().clone(), arr))
681
},
682
_ => polars_bail!(opq = is_infinite, self.dtype()),
683
}
684
}
685
686
/// Create a new ChunkedArray with values from self where the mask evaluates `true` and values
687
/// from `other` where the mask evaluates `false`. This function automatically broadcasts unit
688
/// length inputs.
689
#[cfg(feature = "zip_with")]
690
pub fn zip_with(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult<Series> {
691
let (lhs, rhs) = coerce_lhs_rhs(self, other)?;
692
lhs.zip_with_same_type(mask, rhs.as_ref())
693
}
694
695
/// Converts a Series to their physical representation, if they have one,
696
/// otherwise the series is left unchanged.
697
///
698
/// * Date -> Int32
699
/// * Datetime -> Int64
700
/// * Duration -> Int64
701
/// * Decimal -> Int128
702
/// * Time -> Int64
703
/// * Categorical -> U8/U16/U32
704
/// * List(inner) -> List(physical of inner)
705
/// * Array(inner) -> Array(physical of inner)
706
/// * Struct -> Struct with physical repr of each struct column
707
pub fn to_physical_repr(&self) -> Cow<'_, Series> {
708
use DataType::*;
709
match self.dtype() {
710
// NOTE: Don't use cast here, as it might rechunk (if all nulls)
711
// which is not allowed in a phys repr.
712
#[cfg(feature = "dtype-date")]
713
Date => Cow::Owned(self.date().unwrap().phys.clone().into_series()),
714
#[cfg(feature = "dtype-datetime")]
715
Datetime(_, _) => Cow::Owned(self.datetime().unwrap().phys.clone().into_series()),
716
#[cfg(feature = "dtype-duration")]
717
Duration(_) => Cow::Owned(self.duration().unwrap().phys.clone().into_series()),
718
#[cfg(feature = "dtype-time")]
719
Time => Cow::Owned(self.time().unwrap().phys.clone().into_series()),
720
#[cfg(feature = "dtype-categorical")]
721
dt @ (Categorical(_, _) | Enum(_, _)) => {
722
with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
723
let ca = self.cat::<$C>().unwrap();
724
Cow::Owned(ca.physical().clone().into_series())
725
})
726
},
727
#[cfg(feature = "dtype-decimal")]
728
Decimal(_, _) => Cow::Owned(self.decimal().unwrap().phys.clone().into_series()),
729
List(_) => match self.list().unwrap().to_physical_repr() {
730
Cow::Borrowed(_) => Cow::Borrowed(self),
731
Cow::Owned(ca) => Cow::Owned(ca.into_series()),
732
},
733
#[cfg(feature = "dtype-array")]
734
Array(_, _) => match self.array().unwrap().to_physical_repr() {
735
Cow::Borrowed(_) => Cow::Borrowed(self),
736
Cow::Owned(ca) => Cow::Owned(ca.into_series()),
737
},
738
#[cfg(feature = "dtype-struct")]
739
Struct(_) => match self.struct_().unwrap().to_physical_repr() {
740
Cow::Borrowed(_) => Cow::Borrowed(self),
741
Cow::Owned(ca) => Cow::Owned(ca.into_series()),
742
},
743
_ => Cow::Borrowed(self),
744
}
745
}
746
747
/// Traverse and collect every nth element in a new array.
748
pub fn gather_every(&self, n: usize, offset: usize) -> PolarsResult<Series> {
749
polars_ensure!(n > 0, ComputeError: "cannot perform gather every for `n=0`");
750
let idx = ((offset as IdxSize)..self.len() as IdxSize)
751
.step_by(n)
752
.collect_ca(PlSmallStr::EMPTY);
753
// SAFETY: we stay in-bounds.
754
Ok(unsafe { self.take_unchecked(&idx) })
755
}
756
757
#[cfg(feature = "dot_product")]
758
pub fn dot(&self, other: &Series) -> PolarsResult<f64> {
759
std::ops::Mul::mul(self, other)?.sum::<f64>()
760
}
761
762
/// Get the sum of the Series as a new Series of length 1.
763
/// Returns a Series with a single zeroed entry if self is an empty numeric series.
764
///
765
/// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
766
/// first cast to `Int64` to prevent overflow issues.
767
pub fn sum_reduce(&self) -> PolarsResult<Scalar> {
768
use DataType::*;
769
match self.dtype() {
770
Int8 | UInt8 | Int16 | UInt16 => self.cast(&Int64).unwrap().sum_reduce(),
771
_ => self.0.sum_reduce(),
772
}
773
}
774
775
/// Get the product of an array.
776
///
777
/// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
778
/// first cast to `Int64` to prevent overflow issues.
779
pub fn product(&self) -> PolarsResult<Scalar> {
780
#[cfg(feature = "product")]
781
{
782
use DataType::*;
783
match self.dtype() {
784
Boolean => self.cast(&DataType::Int64).unwrap().product(),
785
Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 => {
786
let s = self.cast(&Int64).unwrap();
787
s.product()
788
},
789
Int64 => Ok(self.i64().unwrap().prod_reduce()),
790
UInt64 => Ok(self.u64().unwrap().prod_reduce()),
791
#[cfg(feature = "dtype-i128")]
792
Int128 => Ok(self.i128().unwrap().prod_reduce()),
793
Float32 => Ok(self.f32().unwrap().prod_reduce()),
794
Float64 => Ok(self.f64().unwrap().prod_reduce()),
795
dt => {
796
polars_bail!(InvalidOperation: "`product` operation not supported for dtype `{dt}`")
797
},
798
}
799
}
800
#[cfg(not(feature = "product"))]
801
{
802
panic!("activate 'product' feature")
803
}
804
}
805
806
/// Cast throws an error if conversion had overflows
807
pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Series> {
808
self.cast_with_options(dtype, CastOptions::Strict)
809
}
810
811
#[cfg(feature = "dtype-decimal")]
812
pub(crate) fn into_decimal(
813
self,
814
precision: Option<usize>,
815
scale: usize,
816
) -> PolarsResult<Series> {
817
match self.dtype() {
818
DataType::Int128 => Ok(self
819
.i128()
820
.unwrap()
821
.clone()
822
.into_decimal(precision, scale)?
823
.into_series()),
824
DataType::Decimal(cur_prec, cur_scale)
825
if (cur_prec.is_none() || precision.is_none() || *cur_prec == precision)
826
&& *cur_scale == Some(scale) =>
827
{
828
Ok(self)
829
},
830
dt => panic!("into_decimal({precision:?}, {scale}) not implemented for {dt:?}"),
831
}
832
}
833
834
#[cfg(feature = "dtype-time")]
835
pub(crate) fn into_time(self) -> Series {
836
match self.dtype() {
837
DataType::Int64 => self.i64().unwrap().clone().into_time().into_series(),
838
DataType::Time => self
839
.time()
840
.unwrap()
841
.physical()
842
.clone()
843
.into_time()
844
.into_series(),
845
dt => panic!("date not implemented for {dt:?}"),
846
}
847
}
848
849
pub(crate) fn into_date(self) -> Series {
850
#[cfg(not(feature = "dtype-date"))]
851
{
852
panic!("activate feature dtype-date")
853
}
854
#[cfg(feature = "dtype-date")]
855
match self.dtype() {
856
DataType::Int32 => self.i32().unwrap().clone().into_date().into_series(),
857
DataType::Date => self
858
.date()
859
.unwrap()
860
.physical()
861
.clone()
862
.into_date()
863
.into_series(),
864
dt => panic!("date not implemented for {dt:?}"),
865
}
866
}
867
868
#[allow(unused_variables)]
869
pub(crate) fn into_datetime(self, timeunit: TimeUnit, tz: Option<TimeZone>) -> Series {
870
#[cfg(not(feature = "dtype-datetime"))]
871
{
872
panic!("activate feature dtype-datetime")
873
}
874
875
#[cfg(feature = "dtype-datetime")]
876
match self.dtype() {
877
DataType::Int64 => self
878
.i64()
879
.unwrap()
880
.clone()
881
.into_datetime(timeunit, tz)
882
.into_series(),
883
DataType::Datetime(_, _) => self
884
.datetime()
885
.unwrap()
886
.physical()
887
.clone()
888
.into_datetime(timeunit, tz)
889
.into_series(),
890
dt => panic!("into_datetime not implemented for {dt:?}"),
891
}
892
}
893
894
#[allow(unused_variables)]
895
pub(crate) fn into_duration(self, timeunit: TimeUnit) -> Series {
896
#[cfg(not(feature = "dtype-duration"))]
897
{
898
panic!("activate feature dtype-duration")
899
}
900
#[cfg(feature = "dtype-duration")]
901
match self.dtype() {
902
DataType::Int64 => self
903
.i64()
904
.unwrap()
905
.clone()
906
.into_duration(timeunit)
907
.into_series(),
908
DataType::Duration(_) => self
909
.duration()
910
.unwrap()
911
.physical()
912
.clone()
913
.into_duration(timeunit)
914
.into_series(),
915
dt => panic!("into_duration not implemented for {dt:?}"),
916
}
917
}
918
919
// used for formatting
920
pub fn str_value(&self, index: usize) -> PolarsResult<Cow<'_, str>> {
921
Ok(self.0.get(index)?.str_value())
922
}
923
/// Get the head of the Series.
924
pub fn head(&self, length: Option<usize>) -> Series {
925
let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);
926
self.slice(0, std::cmp::min(len, self.len()))
927
}
928
929
/// Get the tail of the Series.
930
pub fn tail(&self, length: Option<usize>) -> Series {
931
let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);
932
let len = std::cmp::min(len, self.len());
933
self.slice(-(len as i64), len)
934
}
935
936
pub fn mean_reduce(&self) -> Scalar {
937
crate::scalar::reduce::mean_reduce(self.mean(), self.dtype().clone())
938
}
939
940
/// Compute the unique elements, but maintain order. This requires more work
941
/// than a naive [`Series::unique`](SeriesTrait::unique).
942
pub fn unique_stable(&self) -> PolarsResult<Series> {
943
let idx = self.arg_unique()?;
944
// SAFETY: Indices are in bounds.
945
unsafe { Ok(self.take_unchecked(&idx)) }
946
}
947
948
pub fn try_idx(&self) -> Option<&IdxCa> {
949
#[cfg(feature = "bigidx")]
950
{
951
self.try_u64()
952
}
953
#[cfg(not(feature = "bigidx"))]
954
{
955
self.try_u32()
956
}
957
}
958
959
pub fn idx(&self) -> PolarsResult<&IdxCa> {
960
#[cfg(feature = "bigidx")]
961
{
962
self.u64()
963
}
964
#[cfg(not(feature = "bigidx"))]
965
{
966
self.u32()
967
}
968
}
969
970
/// Returns an estimation of the total (heap) allocated size of the `Series` in bytes.
971
///
972
/// # Implementation
973
/// This estimation is the sum of the size of its buffers, validity, including nested arrays.
974
/// Multiple arrays may share buffers and bitmaps. Therefore, the size of 2 arrays is not the
975
/// sum of the sizes computed from this function. In particular, [`StructArray`]'s size is an upper bound.
976
///
977
/// When an array is sliced, its allocated size remains constant because the buffer unchanged.
978
/// However, this function will yield a smaller number. This is because this function returns
979
/// the visible size of the buffer, not its total capacity.
980
///
981
/// FFI buffers are included in this estimation.
982
pub fn estimated_size(&self) -> usize {
983
let mut size = 0;
984
match self.dtype() {
985
// TODO @ cat-rework: include mapping size here?
986
#[cfg(feature = "object")]
987
DataType::Object(_) => {
988
let ArrowDataType::FixedSizeBinary(size) = self.chunks()[0].dtype() else {
989
unreachable!()
990
};
991
// This is only the pointer size in python. So will be a huge underestimation.
992
return self.len() * *size;
993
},
994
_ => {},
995
}
996
997
size += self
998
.chunks()
999
.iter()
1000
.map(|arr| estimated_bytes_size(&**arr))
1001
.sum::<usize>();
1002
1003
size
1004
}
1005
1006
/// Packs every element into a list.
1007
pub fn as_list(&self) -> ListChunked {
1008
let s = self.rechunk();
1009
// don't use `to_arrow` as we need the physical types
1010
let values = s.chunks()[0].clone();
1011
let offsets = (0i64..(s.len() as i64 + 1)).collect::<Vec<_>>();
1012
let offsets = unsafe { Offsets::new_unchecked(offsets) };
1013
1014
let dtype = LargeListArray::default_datatype(
1015
s.dtype().to_physical().to_arrow(CompatLevel::newest()),
1016
);
1017
let new_arr = LargeListArray::new(dtype, offsets.into(), values, None);
1018
let mut out = ListChunked::with_chunk(s.name().clone(), new_arr);
1019
out.set_inner_dtype(s.dtype().clone());
1020
out
1021
}
1022
1023
pub fn row_encode_unordered(&self) -> PolarsResult<BinaryOffsetChunked> {
1024
row_encode::_get_rows_encoded_ca_unordered(
1025
self.name().clone(),
1026
&[self.clone().into_column()],
1027
)
1028
}
1029
1030
pub fn row_encode_ordered(
1031
&self,
1032
descending: bool,
1033
nulls_last: bool,
1034
) -> PolarsResult<BinaryOffsetChunked> {
1035
row_encode::_get_rows_encoded_ca(
1036
self.name().clone(),
1037
&[self.clone().into_column()],
1038
&[descending],
1039
&[nulls_last],
1040
)
1041
}
1042
}
1043
1044
impl Deref for Series {
1045
type Target = dyn SeriesTrait;
1046
1047
fn deref(&self) -> &Self::Target {
1048
self.0.as_ref()
1049
}
1050
}
1051
1052
impl<'a> AsRef<dyn SeriesTrait + 'a> for Series {
1053
fn as_ref(&self) -> &(dyn SeriesTrait + 'a) {
1054
self.0.as_ref()
1055
}
1056
}
1057
1058
impl Default for Series {
1059
fn default() -> Self {
1060
Int64Chunked::default().into_series()
1061
}
1062
}
1063
1064
impl<T: PolarsPhysicalType> AsRef<ChunkedArray<T>> for dyn SeriesTrait + '_ {
1065
fn as_ref(&self) -> &ChunkedArray<T> {
1066
// @NOTE: SeriesTrait `as_any` returns a std::any::Any for the underlying ChunkedArray /
1067
// Logical (so not the SeriesWrap).
1068
let Some(ca) = self.as_any().downcast_ref::<ChunkedArray<T>>() else {
1069
panic!(
1070
"implementation error, cannot get ref {:?} from {:?}",
1071
T::get_static_dtype(),
1072
self.dtype()
1073
);
1074
};
1075
1076
ca
1077
}
1078
}
1079
1080
impl<T: PolarsPhysicalType> AsMut<ChunkedArray<T>> for dyn SeriesTrait + '_ {
1081
fn as_mut(&mut self) -> &mut ChunkedArray<T> {
1082
if !self.as_any_mut().is::<ChunkedArray<T>>() {
1083
panic!(
1084
"implementation error, cannot get ref {:?} from {:?}",
1085
T::get_static_dtype(),
1086
self.dtype()
1087
);
1088
}
1089
1090
// @NOTE: SeriesTrait `as_any` returns a std::any::Any for the underlying ChunkedArray /
1091
// Logical (so not the SeriesWrap).
1092
self.as_any_mut().downcast_mut::<ChunkedArray<T>>().unwrap()
1093
}
1094
}
1095
1096
#[cfg(test)]
1097
mod test {
1098
use crate::prelude::*;
1099
use crate::series::*;
1100
1101
#[test]
1102
fn cast() {
1103
let ar = UInt32Chunked::new("a".into(), &[1, 2]);
1104
let s = ar.into_series();
1105
let s2 = s.cast(&DataType::Int64).unwrap();
1106
1107
assert!(s2.i64().is_ok());
1108
let s2 = s.cast(&DataType::Float32).unwrap();
1109
assert!(s2.f32().is_ok());
1110
}
1111
1112
#[test]
1113
fn new_series() {
1114
let _ = Series::new("boolean series".into(), &vec![true, false, true]);
1115
let _ = Series::new("int series".into(), &[1, 2, 3]);
1116
let ca = Int32Chunked::new("a".into(), &[1, 2, 3]);
1117
let _ = ca.into_series();
1118
}
1119
1120
#[test]
1121
#[cfg(feature = "dtype-date")]
1122
fn roundtrip_list_logical_20311() {
1123
let list = ListChunked::from_chunk_iter(
1124
PlSmallStr::from_static("a"),
1125
[ListArray::new(
1126
ArrowDataType::LargeList(Box::new(ArrowField::new(
1127
LIST_VALUES_NAME,
1128
ArrowDataType::Int32,
1129
true,
1130
))),
1131
unsafe { Offsets::new_unchecked(vec![0, 1]) }.into(),
1132
PrimitiveArray::new(ArrowDataType::Int32, vec![1i32].into(), None).to_boxed(),
1133
None,
1134
)],
1135
);
1136
let list = unsafe { list.from_physical_unchecked(DataType::Date) }.unwrap();
1137
assert_eq!(list.dtype(), &DataType::List(Box::new(DataType::Date)));
1138
}
1139
1140
#[test]
1141
#[cfg(feature = "dtype-struct")]
1142
fn new_series_from_empty_structs() {
1143
let dtype = DataType::Struct(vec![]);
1144
let empties = vec![AnyValue::StructOwned(Box::new((vec![], vec![]))); 3];
1145
let s = Series::from_any_values_and_dtype("".into(), &empties, &dtype, false).unwrap();
1146
assert_eq!(s.len(), 3);
1147
}
1148
#[test]
1149
fn new_series_from_arrow_primitive_array() {
1150
let array = UInt32Array::from_slice([1, 2, 3, 4, 5]);
1151
let array_ref: ArrayRef = Box::new(array);
1152
1153
let _ = Series::try_new("foo".into(), array_ref).unwrap();
1154
}
1155
1156
#[test]
1157
fn series_append() {
1158
let mut s1 = Series::new("a".into(), &[1, 2]);
1159
let s2 = Series::new("b".into(), &[3]);
1160
s1.append(&s2).unwrap();
1161
assert_eq!(s1.len(), 3);
1162
1163
// add wrong type
1164
let s2 = Series::new("b".into(), &[3.0]);
1165
assert!(s1.append(&s2).is_err())
1166
}
1167
1168
#[test]
1169
#[cfg(feature = "dtype-decimal")]
1170
fn series_append_decimal() {
1171
let s1 = Series::new("a".into(), &[1.1, 2.3])
1172
.cast(&DataType::Decimal(None, Some(2)))
1173
.unwrap();
1174
let s2 = Series::new("b".into(), &[3])
1175
.cast(&DataType::Decimal(None, Some(0)))
1176
.unwrap();
1177
1178
{
1179
let mut s1 = s1.clone();
1180
s1.append(&s2).unwrap();
1181
assert_eq!(s1.len(), 3);
1182
assert_eq!(s1.get(2).unwrap(), AnyValue::Decimal(300, 2));
1183
}
1184
1185
{
1186
let mut s2 = s2;
1187
s2.extend(&s1).unwrap();
1188
assert_eq!(s2.get(2).unwrap(), AnyValue::Decimal(2, 0));
1189
}
1190
}
1191
1192
#[test]
1193
fn series_slice_works() {
1194
let series = Series::new("a".into(), &[1i64, 2, 3, 4, 5]);
1195
1196
let slice_1 = series.slice(-3, 3);
1197
let slice_2 = series.slice(-5, 5);
1198
let slice_3 = series.slice(0, 5);
1199
1200
assert_eq!(slice_1.get(0).unwrap(), AnyValue::Int64(3));
1201
assert_eq!(slice_2.get(0).unwrap(), AnyValue::Int64(1));
1202
assert_eq!(slice_3.get(0).unwrap(), AnyValue::Int64(1));
1203
}
1204
1205
#[test]
1206
fn out_of_range_slice_does_not_panic() {
1207
let series = Series::new("a".into(), &[1i64, 2, 3, 4, 5]);
1208
1209
let _ = series.slice(-3, 4);
1210
let _ = series.slice(-6, 2);
1211
let _ = series.slice(4, 2);
1212
}
1213
}
1214
1215