Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/frame/column/mod.rs
8446 views
1
use std::borrow::Cow;
2
3
use arrow::bitmap::{Bitmap, BitmapBuilder};
4
use arrow::trusted_len::TrustMyLength;
5
use num_traits::{Num, NumCast};
6
use polars_compute::rolling::QuantileMethod;
7
use polars_error::PolarsResult;
8
use polars_utils::aliases::PlSeedableRandomStateQuality;
9
use polars_utils::index::check_bounds;
10
use polars_utils::pl_str::PlSmallStr;
11
pub use scalar::ScalarColumn;
12
13
use self::compare_inner::{TotalEqInner, TotalOrdInner};
14
use self::gather::check_bounds_ca;
15
use self::series::SeriesColumn;
16
use crate::chunked_array::cast::CastOptions;
17
use crate::chunked_array::flags::StatisticsFlags;
18
use crate::datatypes::ReshapeDimension;
19
use crate::prelude::*;
20
use crate::series::{BitRepr, IsSorted, SeriesPhysIter};
21
use crate::utils::{Container, slice_offsets};
22
use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};
23
24
mod arithmetic;
25
mod compare;
26
mod scalar;
27
mod series;
28
29
/// A column within a [`DataFrame`].
30
///
31
/// This is lazily initialized to a [`Series`] with methods like
32
/// [`as_materialized_series`][Column::as_materialized_series] and
33
/// [`take_materialized_series`][Column::take_materialized_series].
34
///
35
/// Currently, there are two ways to represent a [`Column`].
36
/// 1. A [`Series`] of values
37
/// 2. A [`ScalarColumn`] that repeats a single [`Scalar`]
38
#[derive(Debug, Clone)]
39
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
40
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
41
pub enum Column {
42
Series(SeriesColumn),
43
Scalar(ScalarColumn),
44
}
45
46
/// Convert `Self` into a [`Column`]
47
pub trait IntoColumn: Sized {
48
fn into_column(self) -> Column;
49
}
50
51
impl Column {
52
#[inline]
53
#[track_caller]
54
pub fn new<T, Phantom>(name: PlSmallStr, values: T) -> Self
55
where
56
Phantom: ?Sized,
57
Series: NamedFrom<T, Phantom>,
58
{
59
Self::Series(SeriesColumn::new(NamedFrom::new(name, values)))
60
}
61
62
#[inline]
63
pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Self {
64
Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), 0)
65
}
66
67
#[inline]
68
pub fn new_scalar(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
69
Self::Scalar(ScalarColumn::new(name, scalar, length))
70
}
71
72
pub fn new_row_index(name: PlSmallStr, offset: IdxSize, length: usize) -> PolarsResult<Column> {
73
let Ok(length) = IdxSize::try_from(length) else {
74
polars_bail!(
75
ComputeError:
76
"row index length {} overflows IdxSize::MAX ({})",
77
length,
78
IdxSize::MAX,
79
)
80
};
81
82
if offset.checked_add(length).is_none() {
83
polars_bail!(
84
ComputeError:
85
"row index with offset {} overflows on dataframe with height {}",
86
offset, length
87
)
88
}
89
90
let range = offset..offset + length;
91
92
let mut ca = IdxCa::from_vec(name, range.collect());
93
ca.set_sorted_flag(IsSorted::Ascending);
94
let col = ca.into_series().into();
95
96
Ok(col)
97
}
98
99
// # Materialize
100
/// Get a reference to a [`Series`] for this [`Column`]
101
///
102
/// This may need to materialize the [`Series`] on the first invocation for a specific column.
103
#[inline]
104
pub fn as_materialized_series(&self) -> &Series {
105
match self {
106
Column::Series(s) => s,
107
Column::Scalar(s) => s.as_materialized_series(),
108
}
109
}
110
111
/// If the memory repr of this Column is a scalar, a unit-length Series will
112
/// be returned.
113
#[inline]
114
pub fn as_materialized_series_maintain_scalar(&self) -> Series {
115
match self {
116
Column::Scalar(s) => s.as_single_value_series(),
117
v => v.as_materialized_series().clone(),
118
}
119
}
120
121
/// Returns the backing `Series` for the values of this column.
122
///
123
/// * For `Column::Series` columns, simply returns the inner `Series`.
124
/// * For `Column::Scalar` columns, returns an empty or unit length series.
125
///
126
/// # Note
127
/// This method is safe to use. However, care must be taken when operating on the returned
128
/// `Series` to ensure result correctness. E.g. It is suitable to perform elementwise operations
129
/// on it, however e.g. aggregations will return unspecified results.
130
pub fn _get_backing_series(&self) -> Series {
131
match self {
132
Column::Series(s) => (**s).clone(),
133
Column::Scalar(s) => s.as_single_value_series(),
134
}
135
}
136
137
/// Constructs a new `Column` of the same variant as `self` from a backing `Series` representing
138
/// the values.
139
///
140
/// # Panics
141
/// Panics if:
142
/// * `self` is `Column::Series` and the length of `new_s` does not match that of `self`.
143
/// * `self` is `Column::Scalar` and if either:
144
/// * `self` is not empty and `new_s` is not of unit length.
145
/// * `self` is empty and `new_s` is not empty.
146
pub fn _to_new_from_backing(&self, new_s: Series) -> Self {
147
match self {
148
Column::Series(s) => {
149
assert_eq!(new_s.len(), s.len());
150
Column::Series(SeriesColumn::new(new_s))
151
},
152
Column::Scalar(s) => {
153
assert_eq!(new_s.len(), s.as_single_value_series().len());
154
Column::Scalar(ScalarColumn::from_single_value_series(new_s, self.len()))
155
},
156
}
157
}
158
159
/// Turn [`Column`] into a [`Column::Series`].
160
///
161
/// This may need to materialize the [`Series`] on the first invocation for a specific column.
162
#[inline]
163
pub fn into_materialized_series(&mut self) -> &mut Series {
164
match self {
165
Column::Series(s) => s,
166
Column::Scalar(s) => {
167
let series = std::mem::replace(
168
s,
169
ScalarColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
170
)
171
.take_materialized_series();
172
*self = Column::Series(series.into());
173
let Column::Series(s) = self else {
174
unreachable!();
175
};
176
s
177
},
178
}
179
}
180
/// Take [`Series`] from a [`Column`]
181
///
182
/// This may need to materialize the [`Series`] on the first invocation for a specific column.
183
#[inline]
184
pub fn take_materialized_series(self) -> Series {
185
match self {
186
Column::Series(s) => s.take(),
187
Column::Scalar(s) => s.take_materialized_series(),
188
}
189
}
190
191
#[inline]
192
pub fn dtype(&self) -> &DataType {
193
match self {
194
Column::Series(s) => s.dtype(),
195
Column::Scalar(s) => s.dtype(),
196
}
197
}
198
199
#[inline]
200
pub fn field(&self) -> Cow<'_, Field> {
201
match self {
202
Column::Series(s) => s.field(),
203
Column::Scalar(s) => match s.lazy_as_materialized_series() {
204
None => Cow::Owned(Field::new(s.name().clone(), s.dtype().clone())),
205
Some(s) => s.field(),
206
},
207
}
208
}
209
210
#[inline]
211
pub fn name(&self) -> &PlSmallStr {
212
match self {
213
Column::Series(s) => s.name(),
214
Column::Scalar(s) => s.name(),
215
}
216
}
217
218
#[inline]
219
pub fn len(&self) -> usize {
220
match self {
221
Column::Series(s) => s.len(),
222
Column::Scalar(s) => s.len(),
223
}
224
}
225
226
#[inline]
227
pub fn with_name(mut self, name: PlSmallStr) -> Column {
228
self.rename(name);
229
self
230
}
231
232
#[inline]
233
pub fn rename(&mut self, name: PlSmallStr) {
234
match self {
235
Column::Series(s) => _ = s.rename(name),
236
Column::Scalar(s) => _ = s.rename(name),
237
}
238
}
239
240
// # Downcasting
241
#[inline]
242
pub fn as_series(&self) -> Option<&Series> {
243
match self {
244
Column::Series(s) => Some(s),
245
_ => None,
246
}
247
}
248
#[inline]
249
pub fn as_scalar_column(&self) -> Option<&ScalarColumn> {
250
match self {
251
Column::Scalar(s) => Some(s),
252
_ => None,
253
}
254
}
255
#[inline]
256
pub fn as_scalar_column_mut(&mut self) -> Option<&mut ScalarColumn> {
257
match self {
258
Column::Scalar(s) => Some(s),
259
_ => None,
260
}
261
}
262
263
// # Try to Chunked Arrays
264
pub fn try_bool(&self) -> Option<&BooleanChunked> {
265
self.as_materialized_series().try_bool()
266
}
267
pub fn try_i8(&self) -> Option<&Int8Chunked> {
268
self.as_materialized_series().try_i8()
269
}
270
pub fn try_i16(&self) -> Option<&Int16Chunked> {
271
self.as_materialized_series().try_i16()
272
}
273
pub fn try_i32(&self) -> Option<&Int32Chunked> {
274
self.as_materialized_series().try_i32()
275
}
276
pub fn try_i64(&self) -> Option<&Int64Chunked> {
277
self.as_materialized_series().try_i64()
278
}
279
pub fn try_u8(&self) -> Option<&UInt8Chunked> {
280
self.as_materialized_series().try_u8()
281
}
282
pub fn try_u16(&self) -> Option<&UInt16Chunked> {
283
self.as_materialized_series().try_u16()
284
}
285
pub fn try_u32(&self) -> Option<&UInt32Chunked> {
286
self.as_materialized_series().try_u32()
287
}
288
pub fn try_u64(&self) -> Option<&UInt64Chunked> {
289
self.as_materialized_series().try_u64()
290
}
291
#[cfg(feature = "dtype-u128")]
292
pub fn try_u128(&self) -> Option<&UInt128Chunked> {
293
self.as_materialized_series().try_u128()
294
}
295
#[cfg(feature = "dtype-f16")]
296
pub fn try_f16(&self) -> Option<&Float16Chunked> {
297
self.as_materialized_series().try_f16()
298
}
299
pub fn try_f32(&self) -> Option<&Float32Chunked> {
300
self.as_materialized_series().try_f32()
301
}
302
pub fn try_f64(&self) -> Option<&Float64Chunked> {
303
self.as_materialized_series().try_f64()
304
}
305
pub fn try_str(&self) -> Option<&StringChunked> {
306
self.as_materialized_series().try_str()
307
}
308
pub fn try_list(&self) -> Option<&ListChunked> {
309
self.as_materialized_series().try_list()
310
}
311
pub fn try_binary(&self) -> Option<&BinaryChunked> {
312
self.as_materialized_series().try_binary()
313
}
314
pub fn try_idx(&self) -> Option<&IdxCa> {
315
self.as_materialized_series().try_idx()
316
}
317
pub fn try_binary_offset(&self) -> Option<&BinaryOffsetChunked> {
318
self.as_materialized_series().try_binary_offset()
319
}
320
#[cfg(feature = "dtype-datetime")]
321
pub fn try_datetime(&self) -> Option<&DatetimeChunked> {
322
self.as_materialized_series().try_datetime()
323
}
324
#[cfg(feature = "dtype-struct")]
325
pub fn try_struct(&self) -> Option<&StructChunked> {
326
self.as_materialized_series().try_struct()
327
}
328
#[cfg(feature = "dtype-decimal")]
329
pub fn try_decimal(&self) -> Option<&DecimalChunked> {
330
self.as_materialized_series().try_decimal()
331
}
332
#[cfg(feature = "dtype-array")]
333
pub fn try_array(&self) -> Option<&ArrayChunked> {
334
self.as_materialized_series().try_array()
335
}
336
#[cfg(feature = "dtype-categorical")]
337
pub fn try_cat<T: PolarsCategoricalType>(&self) -> Option<&CategoricalChunked<T>> {
338
self.as_materialized_series().try_cat::<T>()
339
}
340
#[cfg(feature = "dtype-categorical")]
341
pub fn try_cat8(&self) -> Option<&Categorical8Chunked> {
342
self.as_materialized_series().try_cat8()
343
}
344
#[cfg(feature = "dtype-categorical")]
345
pub fn try_cat16(&self) -> Option<&Categorical16Chunked> {
346
self.as_materialized_series().try_cat16()
347
}
348
#[cfg(feature = "dtype-categorical")]
349
pub fn try_cat32(&self) -> Option<&Categorical32Chunked> {
350
self.as_materialized_series().try_cat32()
351
}
352
#[cfg(feature = "dtype-date")]
353
pub fn try_date(&self) -> Option<&DateChunked> {
354
self.as_materialized_series().try_date()
355
}
356
#[cfg(feature = "dtype-duration")]
357
pub fn try_duration(&self) -> Option<&DurationChunked> {
358
self.as_materialized_series().try_duration()
359
}
360
361
// # To Chunked Arrays
362
pub fn bool(&self) -> PolarsResult<&BooleanChunked> {
363
self.as_materialized_series().bool()
364
}
365
pub fn i8(&self) -> PolarsResult<&Int8Chunked> {
366
self.as_materialized_series().i8()
367
}
368
pub fn i16(&self) -> PolarsResult<&Int16Chunked> {
369
self.as_materialized_series().i16()
370
}
371
pub fn i32(&self) -> PolarsResult<&Int32Chunked> {
372
self.as_materialized_series().i32()
373
}
374
pub fn i64(&self) -> PolarsResult<&Int64Chunked> {
375
self.as_materialized_series().i64()
376
}
377
#[cfg(feature = "dtype-i128")]
378
pub fn i128(&self) -> PolarsResult<&Int128Chunked> {
379
self.as_materialized_series().i128()
380
}
381
pub fn u8(&self) -> PolarsResult<&UInt8Chunked> {
382
self.as_materialized_series().u8()
383
}
384
pub fn u16(&self) -> PolarsResult<&UInt16Chunked> {
385
self.as_materialized_series().u16()
386
}
387
pub fn u32(&self) -> PolarsResult<&UInt32Chunked> {
388
self.as_materialized_series().u32()
389
}
390
pub fn u64(&self) -> PolarsResult<&UInt64Chunked> {
391
self.as_materialized_series().u64()
392
}
393
#[cfg(feature = "dtype-u128")]
394
pub fn u128(&self) -> PolarsResult<&UInt128Chunked> {
395
self.as_materialized_series().u128()
396
}
397
#[cfg(feature = "dtype-f16")]
398
pub fn f16(&self) -> PolarsResult<&Float16Chunked> {
399
self.as_materialized_series().f16()
400
}
401
pub fn f32(&self) -> PolarsResult<&Float32Chunked> {
402
self.as_materialized_series().f32()
403
}
404
pub fn f64(&self) -> PolarsResult<&Float64Chunked> {
405
self.as_materialized_series().f64()
406
}
407
pub fn str(&self) -> PolarsResult<&StringChunked> {
408
self.as_materialized_series().str()
409
}
410
pub fn list(&self) -> PolarsResult<&ListChunked> {
411
self.as_materialized_series().list()
412
}
413
pub fn binary(&self) -> PolarsResult<&BinaryChunked> {
414
self.as_materialized_series().binary()
415
}
416
pub fn idx(&self) -> PolarsResult<&IdxCa> {
417
self.as_materialized_series().idx()
418
}
419
pub fn binary_offset(&self) -> PolarsResult<&BinaryOffsetChunked> {
420
self.as_materialized_series().binary_offset()
421
}
422
#[cfg(feature = "dtype-datetime")]
423
pub fn datetime(&self) -> PolarsResult<&DatetimeChunked> {
424
self.as_materialized_series().datetime()
425
}
426
#[cfg(feature = "dtype-struct")]
427
pub fn struct_(&self) -> PolarsResult<&StructChunked> {
428
self.as_materialized_series().struct_()
429
}
430
#[cfg(feature = "dtype-decimal")]
431
pub fn decimal(&self) -> PolarsResult<&DecimalChunked> {
432
self.as_materialized_series().decimal()
433
}
434
#[cfg(feature = "dtype-array")]
435
pub fn array(&self) -> PolarsResult<&ArrayChunked> {
436
self.as_materialized_series().array()
437
}
438
#[cfg(feature = "dtype-categorical")]
439
pub fn cat<T: PolarsCategoricalType>(&self) -> PolarsResult<&CategoricalChunked<T>> {
440
self.as_materialized_series().cat::<T>()
441
}
442
#[cfg(feature = "dtype-categorical")]
443
pub fn cat8(&self) -> PolarsResult<&Categorical8Chunked> {
444
self.as_materialized_series().cat8()
445
}
446
#[cfg(feature = "dtype-categorical")]
447
pub fn cat16(&self) -> PolarsResult<&Categorical16Chunked> {
448
self.as_materialized_series().cat16()
449
}
450
#[cfg(feature = "dtype-categorical")]
451
pub fn cat32(&self) -> PolarsResult<&Categorical32Chunked> {
452
self.as_materialized_series().cat32()
453
}
454
#[cfg(feature = "dtype-date")]
455
pub fn date(&self) -> PolarsResult<&DateChunked> {
456
self.as_materialized_series().date()
457
}
458
#[cfg(feature = "dtype-duration")]
459
pub fn duration(&self) -> PolarsResult<&DurationChunked> {
460
self.as_materialized_series().duration()
461
}
462
463
// # Casting
464
pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
465
match self {
466
Column::Series(s) => s.cast_with_options(dtype, options).map(Column::from),
467
Column::Scalar(s) => s.cast_with_options(dtype, options).map(Column::from),
468
}
469
}
470
pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {
471
match self {
472
Column::Series(s) => s.strict_cast(dtype).map(Column::from),
473
Column::Scalar(s) => s.strict_cast(dtype).map(Column::from),
474
}
475
}
476
pub fn cast(&self, dtype: &DataType) -> PolarsResult<Column> {
477
match self {
478
Column::Series(s) => s.cast(dtype).map(Column::from),
479
Column::Scalar(s) => s.cast(dtype).map(Column::from),
480
}
481
}
482
/// # Safety
483
///
484
/// This can lead to invalid memory access in downstream code.
485
pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
486
match self {
487
Column::Series(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
488
Column::Scalar(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
489
}
490
}
491
492
#[must_use]
493
pub fn clear(&self) -> Self {
494
match self {
495
Column::Series(s) => s.clear().into(),
496
Column::Scalar(s) => s.resize(0).into(),
497
}
498
}
499
500
#[inline]
501
pub fn shrink_to_fit(&mut self) {
502
match self {
503
Column::Series(s) => s.shrink_to_fit(),
504
Column::Scalar(_) => {},
505
}
506
}
507
508
#[inline]
509
pub fn new_from_index(&self, index: usize, length: usize) -> Self {
510
if index >= self.len() {
511
return Self::full_null(self.name().clone(), length, self.dtype());
512
}
513
514
match self {
515
Column::Series(s) => {
516
// SAFETY: Bounds check done before.
517
let av = unsafe { s.get_unchecked(index) };
518
let scalar = Scalar::new(self.dtype().clone(), av.into_static());
519
Self::new_scalar(self.name().clone(), scalar, length)
520
},
521
Column::Scalar(s) => s.resize(length).into(),
522
}
523
}
524
525
#[inline]
526
pub fn has_nulls(&self) -> bool {
527
match self {
528
Self::Series(s) => s.has_nulls(),
529
Self::Scalar(s) => s.has_nulls(),
530
}
531
}
532
533
#[inline]
534
pub fn is_null(&self) -> BooleanChunked {
535
match self {
536
Self::Series(s) => s.is_null(),
537
Self::Scalar(s) => {
538
BooleanChunked::full(s.name().clone(), s.scalar().is_null(), s.len())
539
},
540
}
541
}
542
#[inline]
543
pub fn is_not_null(&self) -> BooleanChunked {
544
match self {
545
Self::Series(s) => s.is_not_null(),
546
Self::Scalar(s) => {
547
BooleanChunked::full(s.name().clone(), !s.scalar().is_null(), s.len())
548
},
549
}
550
}
551
552
pub fn to_physical_repr(&self) -> Column {
553
// @scalar-opt
554
self.as_materialized_series()
555
.to_physical_repr()
556
.into_owned()
557
.into()
558
}
559
/// # Safety
560
///
561
/// This can lead to invalid memory access in downstream code.
562
pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
563
// @scalar-opt
564
self.as_materialized_series()
565
.from_physical_unchecked(dtype)
566
.map(Column::from)
567
}
568
569
pub fn head(&self, length: Option<usize>) -> Column {
570
let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);
571
let len = usize::min(len, self.len());
572
self.slice(0, len)
573
}
574
pub fn tail(&self, length: Option<usize>) -> Column {
575
let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);
576
let len = usize::min(len, self.len());
577
debug_assert!(len <= i64::MAX as usize);
578
self.slice(-(len as i64), len)
579
}
580
pub fn slice(&self, offset: i64, length: usize) -> Column {
581
match self {
582
Column::Series(s) => s.slice(offset, length).into(),
583
Column::Scalar(s) => {
584
let (_, length) = slice_offsets(offset, length, s.len());
585
s.resize(length).into()
586
},
587
}
588
}
589
590
pub fn split_at(&self, offset: i64) -> (Column, Column) {
591
// @scalar-opt
592
let (l, r) = self.as_materialized_series().split_at(offset);
593
(l.into(), r.into())
594
}
595
596
#[inline]
597
pub fn null_count(&self) -> usize {
598
match self {
599
Self::Series(s) => s.null_count(),
600
Self::Scalar(s) if s.scalar().is_null() => s.len(),
601
Self::Scalar(_) => 0,
602
}
603
}
604
605
pub fn take(&self, indices: &IdxCa) -> PolarsResult<Column> {
606
check_bounds_ca(indices, self.len() as IdxSize)?;
607
Ok(unsafe { self.take_unchecked(indices) })
608
}
609
pub fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Column> {
610
check_bounds(indices, self.len() as IdxSize)?;
611
Ok(unsafe { self.take_slice_unchecked(indices) })
612
}
613
/// # Safety
614
///
615
/// No bounds on the indexes are performed.
616
pub unsafe fn take_unchecked(&self, indices: &IdxCa) -> Column {
617
debug_assert!(check_bounds_ca(indices, self.len() as IdxSize).is_ok());
618
619
match self {
620
Self::Series(s) => unsafe { s.take_unchecked(indices) }.into(),
621
Self::Scalar(s) => {
622
let idxs_length = indices.len();
623
let idxs_null_count = indices.null_count();
624
625
let scalar = ScalarColumn::from_single_value_series(
626
s.as_single_value_series().take_unchecked(&IdxCa::new(
627
indices.name().clone(),
628
&[0][..s.len().min(1)],
629
)),
630
idxs_length,
631
);
632
633
// We need to make sure that null values in `idx` become null values in the result
634
if idxs_null_count == 0 || scalar.has_nulls() {
635
scalar.into_column()
636
} else if idxs_null_count == idxs_length {
637
scalar.into_nulls().into_column()
638
} else {
639
let validity = indices.rechunk_validity();
640
let series = scalar.take_materialized_series();
641
let name = series.name().clone();
642
let dtype = series.dtype().clone();
643
let mut chunks = series.into_chunks();
644
assert_eq!(chunks.len(), 1);
645
chunks[0] = chunks[0].with_validity(validity);
646
unsafe { Series::from_chunks_and_dtype_unchecked(name, chunks, &dtype) }
647
.into_column()
648
}
649
},
650
}
651
}
652
/// # Safety
653
///
654
/// No bounds on the indexes are performed.
655
pub unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Column {
656
debug_assert!(check_bounds(indices, self.len() as IdxSize).is_ok());
657
658
match self {
659
Self::Series(s) => unsafe { s.take_slice_unchecked(indices) }.into(),
660
Self::Scalar(s) => ScalarColumn::from_single_value_series(
661
s.as_single_value_series()
662
.take_slice_unchecked(&[0][..s.len().min(1)]),
663
indices.len(),
664
)
665
.into(),
666
}
667
}
668
669
/// General implementation for aggregation where a non-missing scalar would map to itself.
670
#[inline(always)]
671
#[cfg(any(feature = "algorithm_group_by", feature = "bitwise"))]
672
fn agg_with_scalar_identity(
673
&self,
674
groups: &GroupsType,
675
series_agg: impl Fn(&Series, &GroupsType) -> Series,
676
) -> Column {
677
match self {
678
Column::Series(s) => series_agg(s, groups).into_column(),
679
Column::Scalar(s) => {
680
if s.is_empty() {
681
return series_agg(s.as_materialized_series(), groups).into_column();
682
}
683
684
// We utilize the aggregation on Series to see:
685
// 1. the output datatype of the aggregation
686
// 2. whether this aggregation is even defined
687
let series_aggregation = series_agg(
688
&s.as_single_value_series(),
689
// @NOTE: this group is always valid since s is non-empty.
690
&GroupsType::new_slice(vec![[0, 1]], false, true),
691
);
692
693
// If the aggregation is not defined, just return all nulls.
694
if series_aggregation.has_nulls() {
695
return Self::new_scalar(
696
series_aggregation.name().clone(),
697
Scalar::new(series_aggregation.dtype().clone(), AnyValue::Null),
698
groups.len(),
699
);
700
}
701
702
let mut scalar_col = s.resize(groups.len());
703
// The aggregation might change the type (e.g. mean changes int -> float), so we do
704
// a cast here to the output type.
705
if series_aggregation.dtype() != s.dtype() {
706
scalar_col = scalar_col.cast(series_aggregation.dtype()).unwrap();
707
}
708
709
let Some(first_empty_idx) = groups.iter().position(|g| g.is_empty()) else {
710
// Fast path: no empty groups. keep the scalar intact.
711
return scalar_col.into_column();
712
};
713
714
// All empty groups produce a *missing* or `null` value.
715
let mut validity = BitmapBuilder::with_capacity(groups.len());
716
validity.extend_constant(first_empty_idx, true);
717
// SAFETY: We trust the length of this iterator.
718
let iter = unsafe {
719
TrustMyLength::new(
720
groups.iter().skip(first_empty_idx).map(|g| !g.is_empty()),
721
groups.len() - first_empty_idx,
722
)
723
};
724
validity.extend_trusted_len_iter(iter);
725
726
let mut s = scalar_col.take_materialized_series().rechunk();
727
// SAFETY: We perform a compute_len afterwards.
728
let chunks = unsafe { s.chunks_mut() };
729
let arr = &mut chunks[0];
730
*arr = arr.with_validity(validity.into_opt_validity());
731
s.compute_len();
732
733
s.into_column()
734
},
735
}
736
}
737
738
/// # Safety
739
///
740
/// Does no bounds checks, groups must be correct.
741
#[cfg(feature = "algorithm_group_by")]
742
pub unsafe fn agg_min(&self, groups: &GroupsType) -> Self {
743
self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_min(g) })
744
}
745
746
/// # Safety
747
///
748
/// Does no bounds checks, groups must be correct.
749
#[cfg(feature = "algorithm_group_by")]
750
pub unsafe fn agg_max(&self, groups: &GroupsType) -> Self {
751
self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_max(g) })
752
}
753
754
/// # Safety
755
///
756
/// Does no bounds checks, groups must be correct.
757
#[cfg(feature = "algorithm_group_by")]
758
pub unsafe fn agg_mean(&self, groups: &GroupsType) -> Self {
759
self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_mean(g) })
760
}
761
762
/// # Safety
763
///
764
/// Does no bounds checks, groups must be correct.
765
#[cfg(feature = "algorithm_group_by")]
766
pub unsafe fn agg_arg_min(&self, groups: &GroupsType) -> Self {
767
match self {
768
Column::Series(s) => unsafe { Column::from(s.agg_arg_min(groups)) },
769
Column::Scalar(sc) => {
770
let scalar = if sc.is_empty() || sc.has_nulls() {
771
Scalar::null(IDX_DTYPE)
772
} else {
773
Scalar::new_idxsize(0)
774
};
775
Column::new_scalar(self.name().clone(), scalar, 1)
776
},
777
}
778
}
779
780
/// # Safety
781
///
782
/// Does no bounds checks, groups must be correct.
783
#[cfg(feature = "algorithm_group_by")]
784
pub unsafe fn agg_arg_max(&self, groups: &GroupsType) -> Self {
785
match self {
786
Column::Series(s) => unsafe { Column::from(s.agg_arg_max(groups)) },
787
Column::Scalar(sc) => {
788
let scalar = if sc.is_empty() || sc.has_nulls() {
789
Scalar::null(IDX_DTYPE)
790
} else {
791
Scalar::new_idxsize(0)
792
};
793
Column::new_scalar(self.name().clone(), scalar, 1)
794
},
795
}
796
}
797
798
/// # Safety
799
///
800
/// Does no bounds checks, groups must be correct.
801
#[cfg(feature = "algorithm_group_by")]
802
pub unsafe fn agg_sum(&self, groups: &GroupsType) -> Self {
803
// @scalar-opt
804
unsafe { self.as_materialized_series().agg_sum(groups) }.into()
805
}
806
807
/// # Safety
808
///
809
/// Does no bounds checks, groups must be correct.
810
#[cfg(feature = "algorithm_group_by")]
811
pub unsafe fn agg_first(&self, groups: &GroupsType) -> Self {
812
self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_first(g) })
813
}
814
815
/// # Safety
816
///
817
/// Does no bounds checks, groups must be correct.
818
#[cfg(feature = "algorithm_group_by")]
819
pub unsafe fn agg_first_non_null(&self, groups: &GroupsType) -> Self {
820
self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_first_non_null(g) })
821
}
822
823
/// # Safety
824
///
825
/// Does no bounds checks, groups must be correct.
826
#[cfg(feature = "algorithm_group_by")]
827
pub unsafe fn agg_last(&self, groups: &GroupsType) -> Self {
828
self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_last(g) })
829
}
830
831
/// # Safety
832
///
833
/// Does no bounds checks, groups must be correct.
834
#[cfg(feature = "algorithm_group_by")]
835
pub unsafe fn agg_last_non_null(&self, groups: &GroupsType) -> Self {
836
self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_last_non_null(g) })
837
}
838
839
/// # Safety
840
///
841
/// Does no bounds checks, groups must be correct.
842
#[cfg(feature = "algorithm_group_by")]
843
pub unsafe fn agg_n_unique(&self, groups: &GroupsType) -> Self {
844
// @scalar-opt
845
unsafe { self.as_materialized_series().agg_n_unique(groups) }.into()
846
}
847
848
/// # Safety
849
///
850
/// Does no bounds checks, groups must be correct.
851
#[cfg(feature = "algorithm_group_by")]
852
pub unsafe fn agg_quantile(
853
&self,
854
groups: &GroupsType,
855
quantile: f64,
856
method: QuantileMethod,
857
) -> Self {
858
// @scalar-opt
859
860
unsafe {
861
self.as_materialized_series()
862
.agg_quantile(groups, quantile, method)
863
}
864
.into()
865
}
866
867
/// # Safety
868
///
869
/// Does no bounds checks, groups must be correct.
870
#[cfg(feature = "algorithm_group_by")]
871
pub unsafe fn agg_median(&self, groups: &GroupsType) -> Self {
872
self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_median(g) })
873
}
874
875
/// # Safety
876
///
877
/// Does no bounds checks, groups must be correct.
878
#[cfg(feature = "algorithm_group_by")]
879
pub unsafe fn agg_var(&self, groups: &GroupsType, ddof: u8) -> Self {
880
// @scalar-opt
881
unsafe { self.as_materialized_series().agg_var(groups, ddof) }.into()
882
}
883
884
/// # Safety
885
///
886
/// Does no bounds checks, groups must be correct.
887
#[cfg(feature = "algorithm_group_by")]
888
pub unsafe fn agg_std(&self, groups: &GroupsType, ddof: u8) -> Self {
889
// @scalar-opt
890
unsafe { self.as_materialized_series().agg_std(groups, ddof) }.into()
891
}
892
893
/// # Safety
894
///
895
/// Does no bounds checks, groups must be correct.
896
#[cfg(feature = "algorithm_group_by")]
897
pub unsafe fn agg_list(&self, groups: &GroupsType) -> Self {
898
// @scalar-opt
899
unsafe { self.as_materialized_series().agg_list(groups) }.into()
900
}
901
902
/// # Safety
903
///
904
/// Does no bounds checks, groups must be correct.
905
#[cfg(feature = "algorithm_group_by")]
906
pub fn agg_valid_count(&self, groups: &GroupsType) -> Self {
907
// @scalar-opt
908
unsafe { self.as_materialized_series().agg_valid_count(groups) }.into()
909
}
910
911
/// # Safety
912
///
913
/// Does no bounds checks, groups must be correct.
914
#[cfg(feature = "bitwise")]
915
pub unsafe fn agg_and(&self, groups: &GroupsType) -> Self {
916
self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_and(g) })
917
}
918
/// # Safety
919
///
920
/// Does no bounds checks, groups must be correct.
921
#[cfg(feature = "bitwise")]
922
pub unsafe fn agg_or(&self, groups: &GroupsType) -> Self {
923
self.agg_with_scalar_identity(groups, |s, g| unsafe { s.agg_or(g) })
924
}
925
/// # Safety
926
///
927
/// Does no bounds checks, groups must be correct.
928
#[cfg(feature = "bitwise")]
929
pub unsafe fn agg_xor(&self, groups: &GroupsType) -> Self {
930
// @scalar-opt
931
unsafe { self.as_materialized_series().agg_xor(groups) }.into()
932
}
933
934
pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self {
935
Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size)
936
}
937
938
pub fn is_empty(&self) -> bool {
939
self.len() == 0
940
}
941
942
pub fn reverse(&self) -> Column {
943
match self {
944
Column::Series(s) => s.reverse().into(),
945
Column::Scalar(_) => self.clone(),
946
}
947
}
948
949
pub fn equals(&self, other: &Column) -> bool {
950
// @scalar-opt
951
self.as_materialized_series()
952
.equals(other.as_materialized_series())
953
}
954
955
pub fn equals_missing(&self, other: &Column) -> bool {
956
// @scalar-opt
957
self.as_materialized_series()
958
.equals_missing(other.as_materialized_series())
959
}
960
961
pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
962
// @scalar-opt
963
match self {
964
Column::Series(s) => s.set_sorted_flag(sorted),
965
Column::Scalar(_) => {},
966
}
967
}
968
969
pub fn get_flags(&self) -> StatisticsFlags {
970
match self {
971
Column::Series(s) => s.get_flags(),
972
Column::Scalar(_) => {
973
StatisticsFlags::IS_SORTED_ASC | StatisticsFlags::CAN_FAST_EXPLODE_LIST
974
},
975
}
976
}
977
978
/// Returns whether the flags were set
979
pub fn set_flags(&mut self, flags: StatisticsFlags) -> bool {
980
match self {
981
Column::Series(s) => {
982
s.set_flags(flags);
983
true
984
},
985
Column::Scalar(_) => false,
986
}
987
}
988
989
pub fn vec_hash(
990
&self,
991
build_hasher: PlSeedableRandomStateQuality,
992
buf: &mut Vec<u64>,
993
) -> PolarsResult<()> {
994
// @scalar-opt?
995
self.as_materialized_series().vec_hash(build_hasher, buf)
996
}
997
998
pub fn vec_hash_combine(
999
&self,
1000
build_hasher: PlSeedableRandomStateQuality,
1001
hashes: &mut [u64],
1002
) -> PolarsResult<()> {
1003
// @scalar-opt?
1004
self.as_materialized_series()
1005
.vec_hash_combine(build_hasher, hashes)
1006
}
1007
1008
pub fn append(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1009
// @scalar-opt
1010
self.into_materialized_series()
1011
.append(other.as_materialized_series())?;
1012
Ok(self)
1013
}
1014
pub fn append_owned(&mut self, other: Column) -> PolarsResult<&mut Self> {
1015
self.into_materialized_series()
1016
.append_owned(other.take_materialized_series())?;
1017
Ok(self)
1018
}
1019
1020
pub fn arg_sort(&self, options: SortOptions) -> IdxCa {
1021
if self.is_empty() {
1022
return IdxCa::from_vec(self.name().clone(), Vec::new());
1023
}
1024
1025
if self.null_count() == self.len() {
1026
// We might need to maintain order so just respect the descending parameter.
1027
let values = if options.descending {
1028
(0..self.len() as IdxSize).rev().collect()
1029
} else {
1030
(0..self.len() as IdxSize).collect()
1031
};
1032
1033
return IdxCa::from_vec(self.name().clone(), values);
1034
}
1035
1036
let is_sorted = Some(self.is_sorted_flag());
1037
let Some(is_sorted) = is_sorted.filter(|v| !matches!(v, IsSorted::Not)) else {
1038
return self.as_materialized_series().arg_sort(options);
1039
};
1040
1041
// Fast path: the data is sorted.
1042
let is_sorted_dsc = matches!(is_sorted, IsSorted::Descending);
1043
let invert = options.descending != is_sorted_dsc;
1044
1045
let mut values = Vec::with_capacity(self.len());
1046
1047
#[inline(never)]
1048
fn extend(
1049
start: IdxSize,
1050
end: IdxSize,
1051
slf: &Column,
1052
values: &mut Vec<IdxSize>,
1053
is_only_nulls: bool,
1054
invert: bool,
1055
maintain_order: bool,
1056
) {
1057
debug_assert!(start <= end);
1058
debug_assert!(start as usize <= slf.len());
1059
debug_assert!(end as usize <= slf.len());
1060
1061
if !invert || is_only_nulls {
1062
values.extend(start..end);
1063
return;
1064
}
1065
1066
// If we don't have to maintain order but we have to invert. Just flip it around.
1067
if !maintain_order {
1068
values.extend((start..end).rev());
1069
return;
1070
}
1071
1072
// If we want to maintain order but we also needs to invert, we need to invert
1073
// per group of items.
1074
//
1075
// @NOTE: Since the column is sorted, arg_unique can also take a fast path and
1076
// just do a single traversal.
1077
let arg_unique = slf
1078
.slice(start as i64, (end - start) as usize)
1079
.arg_unique()
1080
.unwrap();
1081
1082
assert!(!arg_unique.has_nulls());
1083
1084
let num_unique = arg_unique.len();
1085
1086
// Fast path: all items are unique.
1087
if num_unique == (end - start) as usize {
1088
values.extend((start..end).rev());
1089
return;
1090
}
1091
1092
if num_unique == 1 {
1093
values.extend(start..end);
1094
return;
1095
}
1096
1097
let mut prev_idx = end - start;
1098
for chunk in arg_unique.downcast_iter() {
1099
for &idx in chunk.values().as_slice().iter().rev() {
1100
values.extend(start + idx..start + prev_idx);
1101
prev_idx = idx;
1102
}
1103
}
1104
}
1105
macro_rules! extend {
1106
($start:expr, $end:expr) => {
1107
extend!($start, $end, is_only_nulls = false);
1108
};
1109
($start:expr, $end:expr, is_only_nulls = $is_only_nulls:expr) => {
1110
extend(
1111
$start,
1112
$end,
1113
self,
1114
&mut values,
1115
$is_only_nulls,
1116
invert,
1117
options.maintain_order,
1118
);
1119
};
1120
}
1121
1122
let length = self.len() as IdxSize;
1123
let null_count = self.null_count() as IdxSize;
1124
1125
if null_count == 0 {
1126
extend!(0, length);
1127
} else {
1128
let has_nulls_last = self.get(self.len() - 1).unwrap().is_null();
1129
match (options.nulls_last, has_nulls_last) {
1130
(true, true) => {
1131
// Current: Nulls last, Wanted: Nulls last
1132
extend!(0, length - null_count);
1133
extend!(length - null_count, length, is_only_nulls = true);
1134
},
1135
(true, false) => {
1136
// Current: Nulls first, Wanted: Nulls last
1137
extend!(null_count, length);
1138
extend!(0, null_count, is_only_nulls = true);
1139
},
1140
(false, true) => {
1141
// Current: Nulls last, Wanted: Nulls first
1142
extend!(length - null_count, length, is_only_nulls = true);
1143
extend!(0, length - null_count);
1144
},
1145
(false, false) => {
1146
// Current: Nulls first, Wanted: Nulls first
1147
extend!(0, null_count, is_only_nulls = true);
1148
extend!(null_count, length);
1149
},
1150
}
1151
}
1152
1153
// @NOTE: This can theoretically be pushed into the previous operation but it is really
1154
// worth it... probably not...
1155
if let Some(limit) = options.limit {
1156
let limit = limit.min(length);
1157
values.truncate(limit as usize);
1158
}
1159
1160
IdxCa::from_vec(self.name().clone(), values)
1161
}
1162
1163
pub fn arg_sort_multiple(
1164
&self,
1165
by: &[Column],
1166
options: &SortMultipleOptions,
1167
) -> PolarsResult<IdxCa> {
1168
// @scalar-opt
1169
self.as_materialized_series().arg_sort_multiple(by, options)
1170
}
1171
1172
pub fn arg_unique(&self) -> PolarsResult<IdxCa> {
1173
match self {
1174
Column::Scalar(s) => Ok(IdxCa::new_vec(s.name().clone(), vec![0])),
1175
_ => self.as_materialized_series().arg_unique(),
1176
}
1177
}
1178
1179
pub fn bit_repr(&self) -> Option<BitRepr> {
1180
// @scalar-opt
1181
self.as_materialized_series().bit_repr()
1182
}
1183
1184
pub fn into_frame(self) -> DataFrame {
1185
// SAFETY: A single-column dataframe cannot have length mismatches or duplicate names
1186
unsafe { DataFrame::new_unchecked(self.len(), vec![self]) }
1187
}
1188
1189
pub fn extend(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1190
// @scalar-opt
1191
self.into_materialized_series()
1192
.extend(other.as_materialized_series())?;
1193
Ok(self)
1194
}
1195
1196
pub fn rechunk(&self) -> Column {
1197
match self {
1198
Column::Series(s) => s.rechunk().into(),
1199
Column::Scalar(s) => {
1200
if s.lazy_as_materialized_series()
1201
.filter(|x| x.n_chunks() > 1)
1202
.is_some()
1203
{
1204
Column::Scalar(ScalarColumn::new(
1205
s.name().clone(),
1206
s.scalar().clone(),
1207
s.len(),
1208
))
1209
} else {
1210
self.clone()
1211
}
1212
},
1213
}
1214
}
1215
1216
pub fn explode(&self, options: ExplodeOptions) -> PolarsResult<Column> {
1217
self.as_materialized_series()
1218
.explode(options)
1219
.map(Column::from)
1220
}
1221
pub fn implode(&self) -> PolarsResult<ListChunked> {
1222
self.as_materialized_series().implode()
1223
}
1224
1225
pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
1226
// @scalar-opt
1227
self.as_materialized_series()
1228
.fill_null(strategy)
1229
.map(Column::from)
1230
}
1231
1232
pub fn divide(&self, rhs: &Column) -> PolarsResult<Self> {
1233
// @scalar-opt
1234
self.as_materialized_series()
1235
.divide(rhs.as_materialized_series())
1236
.map(Column::from)
1237
}
1238
1239
pub fn shift(&self, periods: i64) -> Column {
1240
// @scalar-opt
1241
self.as_materialized_series().shift(periods).into()
1242
}
1243
1244
#[cfg(feature = "zip_with")]
1245
pub fn zip_with(&self, mask: &BooleanChunked, other: &Self) -> PolarsResult<Self> {
1246
// @scalar-opt
1247
self.as_materialized_series()
1248
.zip_with(mask, other.as_materialized_series())
1249
.map(Self::from)
1250
}
1251
1252
#[cfg(feature = "zip_with")]
1253
pub fn zip_with_same_type(
1254
&self,
1255
mask: &ChunkedArray<BooleanType>,
1256
other: &Column,
1257
) -> PolarsResult<Column> {
1258
// @scalar-opt
1259
self.as_materialized_series()
1260
.zip_with_same_type(mask, other.as_materialized_series())
1261
.map(Column::from)
1262
}
1263
1264
pub fn drop_nulls(&self) -> Column {
1265
match self {
1266
Column::Series(s) => s.drop_nulls().into_column(),
1267
Column::Scalar(s) => s.drop_nulls().into_column(),
1268
}
1269
}
1270
1271
/// Packs every element into a list.
1272
pub fn as_list(&self) -> ListChunked {
1273
// @scalar-opt
1274
self.as_materialized_series().as_list()
1275
}
1276
1277
pub fn is_sorted_flag(&self) -> IsSorted {
1278
match self {
1279
Column::Series(s) => s.is_sorted_flag(),
1280
Column::Scalar(_) => IsSorted::Ascending,
1281
}
1282
}
1283
1284
pub fn unique(&self) -> PolarsResult<Column> {
1285
match self {
1286
Column::Series(s) => s.unique().map(Column::from),
1287
Column::Scalar(s) => {
1288
_ = s.as_single_value_series().unique()?;
1289
if s.is_empty() {
1290
return Ok(s.clone().into_column());
1291
}
1292
1293
Ok(s.resize(1).into_column())
1294
},
1295
}
1296
}
1297
pub fn unique_stable(&self) -> PolarsResult<Column> {
1298
match self {
1299
Column::Series(s) => s.unique_stable().map(Column::from),
1300
Column::Scalar(s) => {
1301
_ = s.as_single_value_series().unique_stable()?;
1302
if s.is_empty() {
1303
return Ok(s.clone().into_column());
1304
}
1305
1306
Ok(s.resize(1).into_column())
1307
},
1308
}
1309
}
1310
1311
pub fn reshape_list(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1312
// @scalar-opt
1313
self.as_materialized_series()
1314
.reshape_list(dimensions)
1315
.map(Self::from)
1316
}
1317
1318
#[cfg(feature = "dtype-array")]
1319
pub fn reshape_array(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1320
// @scalar-opt
1321
self.as_materialized_series()
1322
.reshape_array(dimensions)
1323
.map(Self::from)
1324
}
1325
1326
pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {
1327
// @scalar-opt
1328
self.as_materialized_series()
1329
.sort(sort_options)
1330
.map(Self::from)
1331
}
1332
1333
pub fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Self> {
1334
match self {
1335
Column::Series(s) => s.filter(filter).map(Column::from),
1336
Column::Scalar(s) => {
1337
if s.is_empty() {
1338
return Ok(s.clone().into_column());
1339
}
1340
1341
// Broadcasting
1342
if filter.len() == 1 {
1343
return match filter.get(0) {
1344
Some(true) => Ok(s.clone().into_column()),
1345
_ => Ok(s.resize(0).into_column()),
1346
};
1347
}
1348
1349
Ok(s.resize(filter.sum().unwrap() as usize).into_column())
1350
},
1351
}
1352
}
1353
1354
#[cfg(feature = "random")]
1355
pub fn shuffle(&self, seed: Option<u64>) -> Self {
1356
// @scalar-opt
1357
self.as_materialized_series().shuffle(seed).into()
1358
}
1359
1360
#[cfg(feature = "random")]
1361
pub fn sample_frac(
1362
&self,
1363
frac: f64,
1364
with_replacement: bool,
1365
shuffle: bool,
1366
seed: Option<u64>,
1367
) -> PolarsResult<Self> {
1368
self.as_materialized_series()
1369
.sample_frac(frac, with_replacement, shuffle, seed)
1370
.map(Self::from)
1371
}
1372
1373
#[cfg(feature = "random")]
1374
pub fn sample_n(
1375
&self,
1376
n: usize,
1377
with_replacement: bool,
1378
shuffle: bool,
1379
seed: Option<u64>,
1380
) -> PolarsResult<Self> {
1381
self.as_materialized_series()
1382
.sample_n(n, with_replacement, shuffle, seed)
1383
.map(Self::from)
1384
}
1385
1386
pub fn gather_every(&self, n: usize, offset: usize) -> PolarsResult<Column> {
1387
polars_ensure!(n > 0, InvalidOperation: "gather_every(n): n should be positive");
1388
if self.len().saturating_sub(offset) == 0 {
1389
return Ok(self.clear());
1390
}
1391
1392
match self {
1393
Column::Series(s) => Ok(s.gather_every(n, offset)?.into()),
1394
Column::Scalar(s) => {
1395
let total = s.len() - offset;
1396
Ok(s.resize(1 + (total - 1) / n).into())
1397
},
1398
}
1399
}
1400
1401
pub fn extend_constant(&self, value: AnyValue, n: usize) -> PolarsResult<Self> {
1402
if self.is_empty() {
1403
return Ok(Self::new_scalar(
1404
self.name().clone(),
1405
Scalar::new(self.dtype().clone(), value.into_static()),
1406
n,
1407
));
1408
}
1409
1410
match self {
1411
Column::Series(s) => s.extend_constant(value, n).map(Column::from),
1412
Column::Scalar(s) => {
1413
if s.scalar().as_any_value() == value {
1414
Ok(s.resize(s.len() + n).into())
1415
} else {
1416
s.as_materialized_series()
1417
.extend_constant(value, n)
1418
.map(Column::from)
1419
}
1420
},
1421
}
1422
}
1423
1424
pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {
1425
self.try_map_unary_elementwise_to_bool(|s| s.is_finite())
1426
}
1427
pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {
1428
self.try_map_unary_elementwise_to_bool(|s| s.is_infinite())
1429
}
1430
pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {
1431
self.try_map_unary_elementwise_to_bool(|s| s.is_nan())
1432
}
1433
pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {
1434
self.try_map_unary_elementwise_to_bool(|s| s.is_not_nan())
1435
}
1436
1437
pub fn wrapping_trunc_div_scalar<T>(&self, rhs: T) -> Self
1438
where
1439
T: Num + NumCast,
1440
{
1441
// @scalar-opt
1442
self.as_materialized_series()
1443
.wrapping_trunc_div_scalar(rhs)
1444
.into()
1445
}
1446
1447
pub fn product(&self) -> PolarsResult<Scalar> {
1448
// @scalar-opt
1449
self.as_materialized_series().product()
1450
}
1451
1452
pub fn phys_iter(&self) -> SeriesPhysIter<'_> {
1453
// @scalar-opt
1454
self.as_materialized_series().phys_iter()
1455
}
1456
1457
#[inline]
1458
pub fn get(&self, index: usize) -> PolarsResult<AnyValue<'_>> {
1459
polars_ensure!(index < self.len(), oob = index, self.len());
1460
1461
// SAFETY: Bounds check done just before.
1462
Ok(unsafe { self.get_unchecked(index) })
1463
}
1464
/// # Safety
1465
///
1466
/// Does not perform bounds check on `index`
1467
#[inline(always)]
1468
pub unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_> {
1469
debug_assert!(index < self.len());
1470
1471
match self {
1472
Column::Series(s) => unsafe { s.get_unchecked(index) },
1473
Column::Scalar(s) => s.scalar().as_any_value(),
1474
}
1475
}
1476
1477
#[cfg(feature = "object")]
1478
pub fn get_object(
1479
&self,
1480
index: usize,
1481
) -> Option<&dyn crate::chunked_array::object::PolarsObjectSafe> {
1482
self.as_materialized_series().get_object(index)
1483
}
1484
1485
pub fn bitand(&self, rhs: &Self) -> PolarsResult<Self> {
1486
self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l & r)
1487
}
1488
pub fn bitor(&self, rhs: &Self) -> PolarsResult<Self> {
1489
self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l | r)
1490
}
1491
pub fn bitxor(&self, rhs: &Self) -> PolarsResult<Self> {
1492
self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l ^ r)
1493
}
1494
1495
pub fn try_add_owned(self, other: Self) -> PolarsResult<Self> {
1496
match (self, other) {
1497
(Column::Series(lhs), Column::Series(rhs)) => {
1498
lhs.take().try_add_owned(rhs.take()).map(Column::from)
1499
},
1500
(lhs, rhs) => lhs + rhs,
1501
}
1502
}
1503
pub fn try_sub_owned(self, other: Self) -> PolarsResult<Self> {
1504
match (self, other) {
1505
(Column::Series(lhs), Column::Series(rhs)) => {
1506
lhs.take().try_sub_owned(rhs.take()).map(Column::from)
1507
},
1508
(lhs, rhs) => lhs - rhs,
1509
}
1510
}
1511
pub fn try_mul_owned(self, other: Self) -> PolarsResult<Self> {
1512
match (self, other) {
1513
(Column::Series(lhs), Column::Series(rhs)) => {
1514
lhs.take().try_mul_owned(rhs.take()).map(Column::from)
1515
},
1516
(lhs, rhs) => lhs * rhs,
1517
}
1518
}
1519
1520
pub(crate) fn str_value(&self, index: usize) -> PolarsResult<Cow<'_, str>> {
1521
Ok(self.get(index)?.str_value())
1522
}
1523
1524
pub fn min_reduce(&self) -> PolarsResult<Scalar> {
1525
match self {
1526
Column::Series(s) => s.min_reduce(),
1527
Column::Scalar(s) => {
1528
// We don't really want to deal with handling the full semantics here so we just
1529
// cast to a single value series. This is a tiny bit wasteful, but probably fine.
1530
s.as_single_value_series().min_reduce()
1531
},
1532
}
1533
}
1534
pub fn max_reduce(&self) -> PolarsResult<Scalar> {
1535
match self {
1536
Column::Series(s) => s.max_reduce(),
1537
Column::Scalar(s) => {
1538
// We don't really want to deal with handling the full semantics here so we just
1539
// cast to a single value series. This is a tiny bit wasteful, but probably fine.
1540
s.as_single_value_series().max_reduce()
1541
},
1542
}
1543
}
1544
pub fn median_reduce(&self) -> PolarsResult<Scalar> {
1545
match self {
1546
Column::Series(s) => s.median_reduce(),
1547
Column::Scalar(s) => {
1548
// We don't really want to deal with handling the full semantics here so we just
1549
// cast to a single value series. This is a tiny bit wasteful, but probably fine.
1550
s.as_single_value_series().median_reduce()
1551
},
1552
}
1553
}
1554
pub fn mean_reduce(&self) -> PolarsResult<Scalar> {
1555
match self {
1556
Column::Series(s) => s.mean_reduce(),
1557
Column::Scalar(s) => {
1558
// We don't really want to deal with handling the full semantics here so we just
1559
// cast to a single value series. This is a tiny bit wasteful, but probably fine.
1560
s.as_single_value_series().mean_reduce()
1561
},
1562
}
1563
}
1564
pub fn std_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1565
match self {
1566
Column::Series(s) => s.std_reduce(ddof),
1567
Column::Scalar(s) => {
1568
// We don't really want to deal with handling the full semantics here so we just
1569
// cast to a small series. This is a tiny bit wasteful, but probably fine.
1570
let n = s.len().min(ddof as usize + 1);
1571
s.as_n_values_series(n).std_reduce(ddof)
1572
},
1573
}
1574
}
1575
pub fn var_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1576
match self {
1577
Column::Series(s) => s.var_reduce(ddof),
1578
Column::Scalar(s) => {
1579
// We don't really want to deal with handling the full semantics here so we just
1580
// cast to a small series. This is a tiny bit wasteful, but probably fine.
1581
let n = s.len().min(ddof as usize + 1);
1582
s.as_n_values_series(n).var_reduce(ddof)
1583
},
1584
}
1585
}
1586
pub fn sum_reduce(&self) -> PolarsResult<Scalar> {
1587
// @scalar-opt
1588
self.as_materialized_series().sum_reduce()
1589
}
1590
pub fn and_reduce(&self) -> PolarsResult<Scalar> {
1591
match self {
1592
Column::Series(s) => s.and_reduce(),
1593
Column::Scalar(s) => {
1594
// We don't really want to deal with handling the full semantics here so we just
1595
// cast to a single value series. This is a tiny bit wasteful, but probably fine.
1596
s.as_single_value_series().and_reduce()
1597
},
1598
}
1599
}
1600
pub fn or_reduce(&self) -> PolarsResult<Scalar> {
1601
match self {
1602
Column::Series(s) => s.or_reduce(),
1603
Column::Scalar(s) => {
1604
// We don't really want to deal with handling the full semantics here so we just
1605
// cast to a single value series. This is a tiny bit wasteful, but probably fine.
1606
s.as_single_value_series().or_reduce()
1607
},
1608
}
1609
}
1610
pub fn xor_reduce(&self) -> PolarsResult<Scalar> {
1611
match self {
1612
Column::Series(s) => s.xor_reduce(),
1613
Column::Scalar(s) => {
1614
// We don't really want to deal with handling the full semantics here so we just
1615
// cast to a single value series. This is a tiny bit wasteful, but probably fine.
1616
//
1617
// We have to deal with the fact that xor is 0 if there is an even number of
1618
// elements and the value if there is an odd number of elements. If there are zero
1619
// elements the result should be `null`.
1620
s.as_n_values_series(2 - s.len() % 2).xor_reduce()
1621
},
1622
}
1623
}
1624
pub fn n_unique(&self) -> PolarsResult<usize> {
1625
match self {
1626
Column::Series(s) => s.n_unique(),
1627
Column::Scalar(s) => s.as_single_value_series().n_unique(),
1628
}
1629
}
1630
1631
pub fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Scalar> {
1632
self.as_materialized_series()
1633
.quantile_reduce(quantile, method)
1634
}
1635
1636
pub fn quantiles_reduce(
1637
&self,
1638
quantiles: &[f64],
1639
method: QuantileMethod,
1640
) -> PolarsResult<Scalar> {
1641
self.as_materialized_series()
1642
.quantiles_reduce(quantiles, method)
1643
}
1644
1645
pub(crate) fn estimated_size(&self) -> usize {
1646
// @scalar-opt
1647
self.as_materialized_series().estimated_size()
1648
}
1649
1650
pub fn sort_with(&self, options: SortOptions) -> PolarsResult<Self> {
1651
match self {
1652
Column::Series(s) => s.sort_with(options).map(Self::from),
1653
Column::Scalar(s) => {
1654
// This makes this function throw the same errors as Series::sort_with
1655
_ = s.as_single_value_series().sort_with(options)?;
1656
1657
Ok(self.clone())
1658
},
1659
}
1660
}
1661
1662
pub fn map_unary_elementwise_to_bool(
1663
&self,
1664
f: impl Fn(&Series) -> BooleanChunked,
1665
) -> BooleanChunked {
1666
self.try_map_unary_elementwise_to_bool(|s| Ok(f(s)))
1667
.unwrap()
1668
}
1669
pub fn try_map_unary_elementwise_to_bool(
1670
&self,
1671
f: impl Fn(&Series) -> PolarsResult<BooleanChunked>,
1672
) -> PolarsResult<BooleanChunked> {
1673
match self {
1674
Column::Series(s) => f(s),
1675
Column::Scalar(s) => Ok(f(&s.as_single_value_series())?.new_from_index(0, s.len())),
1676
}
1677
}
1678
1679
pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column {
1680
self.try_apply_unary_elementwise(|s| Ok(f(s))).unwrap()
1681
}
1682
pub fn try_apply_unary_elementwise(
1683
&self,
1684
f: impl Fn(&Series) -> PolarsResult<Series>,
1685
) -> PolarsResult<Column> {
1686
match self {
1687
Column::Series(s) => f(s).map(Column::from),
1688
Column::Scalar(s) => Ok(ScalarColumn::from_single_value_series(
1689
f(&s.as_single_value_series())?,
1690
s.len(),
1691
)
1692
.into()),
1693
}
1694
}
1695
1696
pub fn apply_broadcasting_binary_elementwise(
1697
&self,
1698
other: &Self,
1699
op: impl Fn(&Series, &Series) -> Series,
1700
) -> PolarsResult<Column> {
1701
self.try_apply_broadcasting_binary_elementwise(other, |lhs, rhs| Ok(op(lhs, rhs)))
1702
}
1703
pub fn try_apply_broadcasting_binary_elementwise(
1704
&self,
1705
other: &Self,
1706
op: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1707
) -> PolarsResult<Column> {
1708
fn output_length(a: &Column, b: &Column) -> PolarsResult<usize> {
1709
match (a.len(), b.len()) {
1710
// broadcasting
1711
(1, o) | (o, 1) => Ok(o),
1712
// equal
1713
(a, b) if a == b => Ok(a),
1714
// unequal
1715
(a, b) => {
1716
polars_bail!(InvalidOperation: "cannot do a binary operation on columns of different lengths: got {} and {}", a, b)
1717
},
1718
}
1719
}
1720
1721
// Here we rely on the underlying broadcast operations.
1722
let length = output_length(self, other)?;
1723
match (self, other) {
1724
(Column::Series(lhs), Column::Series(rhs)) => op(lhs, rhs).map(Column::from),
1725
(Column::Series(lhs), Column::Scalar(rhs)) => {
1726
op(lhs, &rhs.as_single_value_series()).map(Column::from)
1727
},
1728
(Column::Scalar(lhs), Column::Series(rhs)) => {
1729
op(&lhs.as_single_value_series(), rhs).map(Column::from)
1730
},
1731
(Column::Scalar(lhs), Column::Scalar(rhs)) => {
1732
let lhs = lhs.as_single_value_series();
1733
let rhs = rhs.as_single_value_series();
1734
1735
Ok(ScalarColumn::from_single_value_series(op(&lhs, &rhs)?, length).into_column())
1736
},
1737
}
1738
}
1739
1740
pub fn apply_binary_elementwise(
1741
&self,
1742
other: &Self,
1743
f: impl Fn(&Series, &Series) -> Series,
1744
f_lb: impl Fn(&Scalar, &Series) -> Series,
1745
f_rb: impl Fn(&Series, &Scalar) -> Series,
1746
) -> Column {
1747
self.try_apply_binary_elementwise(
1748
other,
1749
|lhs, rhs| Ok(f(lhs, rhs)),
1750
|lhs, rhs| Ok(f_lb(lhs, rhs)),
1751
|lhs, rhs| Ok(f_rb(lhs, rhs)),
1752
)
1753
.unwrap()
1754
}
1755
pub fn try_apply_binary_elementwise(
1756
&self,
1757
other: &Self,
1758
f: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1759
f_lb: impl Fn(&Scalar, &Series) -> PolarsResult<Series>,
1760
f_rb: impl Fn(&Series, &Scalar) -> PolarsResult<Series>,
1761
) -> PolarsResult<Column> {
1762
debug_assert_eq!(self.len(), other.len());
1763
1764
match (self, other) {
1765
(Column::Series(lhs), Column::Series(rhs)) => f(lhs, rhs).map(Column::from),
1766
(Column::Series(lhs), Column::Scalar(rhs)) => f_rb(lhs, rhs.scalar()).map(Column::from),
1767
(Column::Scalar(lhs), Column::Series(rhs)) => f_lb(lhs.scalar(), rhs).map(Column::from),
1768
(Column::Scalar(lhs), Column::Scalar(rhs)) => {
1769
let lhs = lhs.as_single_value_series();
1770
let rhs = rhs.as_single_value_series();
1771
1772
Ok(
1773
ScalarColumn::from_single_value_series(f(&lhs, &rhs)?, self.len())
1774
.into_column(),
1775
)
1776
},
1777
}
1778
}
1779
1780
#[cfg(feature = "approx_unique")]
1781
pub fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
1782
match self {
1783
Column::Series(s) => s.approx_n_unique(),
1784
Column::Scalar(s) => {
1785
// @NOTE: We do this for the error handling.
1786
s.as_single_value_series().approx_n_unique()?;
1787
Ok(1)
1788
},
1789
}
1790
}
1791
1792
pub fn n_chunks(&self) -> usize {
1793
match self {
1794
Column::Series(s) => s.n_chunks(),
1795
Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()),
1796
}
1797
}
1798
1799
#[expect(clippy::wrong_self_convention)]
1800
pub(crate) fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
1801
// @scalar-opt
1802
self.as_materialized_series().into_total_ord_inner()
1803
}
1804
#[expect(unused, clippy::wrong_self_convention)]
1805
pub(crate) fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
1806
// @scalar-opt
1807
self.as_materialized_series().into_total_eq_inner()
1808
}
1809
1810
pub fn rechunk_to_arrow(self, compat_level: CompatLevel) -> Box<dyn Array> {
1811
// Rechunk to one chunk if necessary
1812
let mut series = self.take_materialized_series();
1813
if series.n_chunks() > 1 {
1814
series = series.rechunk();
1815
}
1816
series.to_arrow(0, compat_level)
1817
}
1818
1819
pub fn trim_lists_to_normalized_offsets(&self) -> Option<Column> {
1820
self.as_materialized_series()
1821
.trim_lists_to_normalized_offsets()
1822
.map(Column::from)
1823
}
1824
1825
pub fn propagate_nulls(&self) -> Option<Column> {
1826
self.as_materialized_series()
1827
.propagate_nulls()
1828
.map(Column::from)
1829
}
1830
1831
pub fn deposit(&self, validity: &Bitmap) -> Column {
1832
self.as_materialized_series()
1833
.deposit(validity)
1834
.into_column()
1835
}
1836
1837
pub fn rechunk_validity(&self) -> Option<Bitmap> {
1838
// @scalar-opt
1839
self.as_materialized_series().rechunk_validity()
1840
}
1841
1842
pub fn unique_id(&self) -> PolarsResult<(IdxSize, Vec<IdxSize>)> {
1843
self.as_materialized_series().unique_id()
1844
}
1845
}
1846
1847
impl Default for Column {
1848
fn default() -> Self {
1849
Self::new_scalar(
1850
PlSmallStr::EMPTY,
1851
Scalar::new(DataType::Int64, AnyValue::Null),
1852
0,
1853
)
1854
}
1855
}
1856
1857
impl PartialEq for Column {
1858
fn eq(&self, other: &Self) -> bool {
1859
// @scalar-opt
1860
self.as_materialized_series()
1861
.eq(other.as_materialized_series())
1862
}
1863
}
1864
1865
impl From<Series> for Column {
1866
#[inline]
1867
fn from(series: Series) -> Self {
1868
// We instantiate a Scalar Column if the Series is length is 1. This makes it possible for
1869
// future operations to be faster.
1870
if series.len() == 1 {
1871
return Self::Scalar(ScalarColumn::unit_scalar_from_series(series));
1872
}
1873
1874
Self::Series(SeriesColumn::new(series))
1875
}
1876
}
1877
1878
impl<T: IntoSeries> IntoColumn for T {
1879
#[inline]
1880
fn into_column(self) -> Column {
1881
self.into_series().into()
1882
}
1883
}
1884
1885
impl IntoColumn for Column {
1886
#[inline(always)]
1887
fn into_column(self) -> Column {
1888
self
1889
}
1890
}
1891
1892
/// We don't want to serialize the scalar columns. So this helps pretend that columns are always
1893
/// initialized without implementing From<Column> for Series.
1894
///
1895
/// Those casts should be explicit.
1896
#[derive(Clone)]
1897
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
1898
#[cfg_attr(feature = "serde", serde(into = "Series"))]
1899
struct _SerdeSeries(Series);
1900
1901
impl From<Column> for _SerdeSeries {
1902
#[inline]
1903
fn from(value: Column) -> Self {
1904
Self(value.take_materialized_series())
1905
}
1906
}
1907
1908
impl From<_SerdeSeries> for Series {
1909
#[inline]
1910
fn from(value: _SerdeSeries) -> Self {
1911
value.0
1912
}
1913
}
1914
1915