CoCalc -- mod.rs

GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/frame/column/mod.rs
⁶⁹⁴⁰ views
1
use std::borrow::Cow;
2

3
use arrow::bitmap::BitmapBuilder;
4
use arrow::trusted_len::TrustMyLength;
5
use num_traits::{Num, NumCast};
6
use polars_compute::rolling::QuantileMethod;
7
use polars_error::PolarsResult;
8
use polars_utils::aliases::PlSeedableRandomStateQuality;
9
use polars_utils::index::check_bounds;
10
use polars_utils::pl_str::PlSmallStr;
11
pub use scalar::ScalarColumn;
12

13
use self::compare_inner::{TotalEqInner, TotalOrdInner};
14
use self::gather::check_bounds_ca;
15
use self::partitioned::PartitionedColumn;
16
use self::series::SeriesColumn;
17
use crate::chunked_array::cast::CastOptions;
18
use crate::chunked_array::flags::StatisticsFlags;
19
use crate::datatypes::ReshapeDimension;
20
use crate::prelude::*;
21
use crate::series::{BitRepr, IsSorted, SeriesPhysIter};
22
use crate::utils::{Container, slice_offsets};
23
use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};
24

25
mod arithmetic;
26
mod compare;
27
mod partitioned;
28
mod scalar;
29
mod series;
30

31
/// A column within a [`DataFrame`].
32
///
33
/// This is lazily initialized to a [`Series`] with methods like
34
/// [`as_materialized_series`][Column::as_materialized_series] and
35
/// [`take_materialized_series`][Column::take_materialized_series].
36
///
37
/// Currently, there are two ways to represent a [`Column`].
38
/// 1. A [`Series`] of values
39
/// 2. A [`ScalarColumn`] that repeats a single [`Scalar`]
40
#[derive(Debug, Clone)]
41
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
42
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
43
pub enum Column {
44
    Series(SeriesColumn),
45
    Partitioned(PartitionedColumn),
46
    Scalar(ScalarColumn),
47
}
48

49
/// Convert `Self` into a [`Column`]
50
pub trait IntoColumn: Sized {
51
    fn into_column(self) -> Column;
52
}
53

54
impl Column {
55
    #[inline]
56
    #[track_caller]
57
    pub fn new<T, Phantom>(name: PlSmallStr, values: T) -> Self
58
    where
59
        Phantom: ?Sized,
60
        Series: NamedFrom<T, Phantom>,
61
    {
62
        Self::Series(SeriesColumn::new(NamedFrom::new(name, values)))
63
    }
64

65
    #[inline]
66
    pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Self {
67
        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), 0)
68
    }
69

70
    #[inline]
71
    pub fn new_scalar(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
72
        Self::Scalar(ScalarColumn::new(name, scalar, length))
73
    }
74

75
    #[inline]
76
    pub fn new_partitioned(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
77
        Self::Scalar(ScalarColumn::new(name, scalar, length))
78
    }
79

80
    pub fn new_row_index(name: PlSmallStr, offset: IdxSize, length: usize) -> PolarsResult<Column> {
81
        let Ok(length) = IdxSize::try_from(length) else {
82
            polars_bail!(
83
                ComputeError:
84
                "row index length {} overflows IdxSize::MAX ({})",
85
                length,
86
                IdxSize::MAX,
87
            )
88
        };
89

90
        if offset.checked_add(length).is_none() {
91
            polars_bail!(
92
                ComputeError:
93
                "row index with offset {} overflows on dataframe with height {}",
94
                offset, length
95
            )
96
        }
97

98
        let range = offset..offset + length;
99

100
        let mut ca = IdxCa::from_vec(name, range.collect());
101
        ca.set_sorted_flag(IsSorted::Ascending);
102
        let col = ca.into_series().into();
103

104
        Ok(col)
105
    }
106

107
    // # Materialize
108
    /// Get a reference to a [`Series`] for this [`Column`]
109
    ///
110
    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
111
    #[inline]
112
    pub fn as_materialized_series(&self) -> &Series {
113
        match self {
114
            Column::Series(s) => s,
115
            Column::Partitioned(s) => s.as_materialized_series(),
116
            Column::Scalar(s) => s.as_materialized_series(),
117
        }
118
    }
119

120
    /// If the memory repr of this Column is a scalar, a unit-length Series will
121
    /// be returned.
122
    #[inline]
123
    pub fn as_materialized_series_maintain_scalar(&self) -> Series {
124
        match self {
125
            Column::Scalar(s) => s.as_single_value_series(),
126
            v => v.as_materialized_series().clone(),
127
        }
128
    }
129

130
    /// Returns the backing `Series` for the values of this column.
131
    ///
132
    /// * For `Column::Series` columns, simply returns the inner `Series`.
133
    /// * For `Column::Partitioned` columns, returns the series representing the values.
134
    /// * For `Column::Scalar` columns, returns an empty or unit length series.
135
    ///
136
    /// # Note
137
    /// This method is safe to use. However, care must be taken when operating on the returned
138
    /// `Series` to ensure result correctness. E.g. It is suitable to perform elementwise operations
139
    /// on it, however e.g. aggregations will return unspecified results.
140
    pub fn _get_backing_series(&self) -> Series {
141
        match self {
142
            Column::Series(s) => (**s).clone(),
143
            Column::Partitioned(s) => s.partitions().clone(),
144
            Column::Scalar(s) => s.as_single_value_series(),
145
        }
146
    }
147

148
    /// Constructs a new `Column` of the same variant as `self` from a backing `Series` representing
149
    /// the values.
150
    ///
151
    /// # Panics
152
    /// Panics if:
153
    /// * `self` is `Column::Series` and the length of `new_s` does not match that of `self`.
154
    /// * `self` is `Column::Partitioned` and the length of `new_s` does not match that of the existing partitions.
155
    /// * `self` is `Column::Scalar` and if either:
156
    ///   * `self` is not empty and `new_s` is not of unit length.
157
    ///   * `self` is empty and `new_s` is not empty.
158
    pub fn _to_new_from_backing(&self, new_s: Series) -> Self {
159
        match self {
160
            Column::Series(s) => {
161
                assert_eq!(new_s.len(), s.len());
162
                Column::Series(SeriesColumn::new(new_s))
163
            },
164
            Column::Partitioned(s) => {
165
                assert_eq!(new_s.len(), s.partitions().len());
166
                unsafe {
167
                    Column::Partitioned(PartitionedColumn::new_unchecked(
168
                        new_s.name().clone(),
169
                        new_s,
170
                        s.partition_ends_ref().clone(),
171
                    ))
172
                }
173
            },
174
            Column::Scalar(s) => {
175
                assert_eq!(new_s.len(), s.as_single_value_series().len());
176
                Column::Scalar(ScalarColumn::from_single_value_series(new_s, self.len()))
177
            },
178
        }
179
    }
180

181
    /// Turn [`Column`] into a [`Column::Series`].
182
    ///
183
    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
184
    #[inline]
185
    pub fn into_materialized_series(&mut self) -> &mut Series {
186
        match self {
187
            Column::Series(s) => s,
188
            Column::Partitioned(s) => {
189
                let series = std::mem::replace(
190
                    s,
191
                    PartitionedColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
192
                )
193
                .take_materialized_series();
194
                *self = Column::Series(series.into());
195
                let Column::Series(s) = self else {
196
                    unreachable!();
197
                };
198
                s
199
            },
200
            Column::Scalar(s) => {
201
                let series = std::mem::replace(
202
                    s,
203
                    ScalarColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
204
                )
205
                .take_materialized_series();
206
                *self = Column::Series(series.into());
207
                let Column::Series(s) = self else {
208
                    unreachable!();
209
                };
210
                s
211
            },
212
        }
213
    }
214
    /// Take [`Series`] from a [`Column`]
215
    ///
216
    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
217
    #[inline]
218
    pub fn take_materialized_series(self) -> Series {
219
        match self {
220
            Column::Series(s) => s.take(),
221
            Column::Partitioned(s) => s.take_materialized_series(),
222
            Column::Scalar(s) => s.take_materialized_series(),
223
        }
224
    }
225

226
    #[inline]
227
    pub fn dtype(&self) -> &DataType {
228
        match self {
229
            Column::Series(s) => s.dtype(),
230
            Column::Partitioned(s) => s.dtype(),
231
            Column::Scalar(s) => s.dtype(),
232
        }
233
    }
234

235
    #[inline]
236
    pub fn field(&self) -> Cow<'_, Field> {
237
        match self {
238
            Column::Series(s) => s.field(),
239
            Column::Partitioned(s) => s.field(),
240
            Column::Scalar(s) => match s.lazy_as_materialized_series() {
241
                None => Cow::Owned(Field::new(s.name().clone(), s.dtype().clone())),
242
                Some(s) => s.field(),
243
            },
244
        }
245
    }
246

247
    #[inline]
248
    pub fn name(&self) -> &PlSmallStr {
249
        match self {
250
            Column::Series(s) => s.name(),
251
            Column::Partitioned(s) => s.name(),
252
            Column::Scalar(s) => s.name(),
253
        }
254
    }
255

256
    #[inline]
257
    pub fn len(&self) -> usize {
258
        match self {
259
            Column::Series(s) => s.len(),
260
            Column::Partitioned(s) => s.len(),
261
            Column::Scalar(s) => s.len(),
262
        }
263
    }
264

265
    #[inline]
266
    pub fn with_name(mut self, name: PlSmallStr) -> Column {
267
        self.rename(name);
268
        self
269
    }
270

271
    #[inline]
272
    pub fn rename(&mut self, name: PlSmallStr) {
273
        match self {
274
            Column::Series(s) => _ = s.rename(name),
275
            Column::Partitioned(s) => _ = s.rename(name),
276
            Column::Scalar(s) => _ = s.rename(name),
277
        }
278
    }
279

280
    // # Downcasting
281
    #[inline]
282
    pub fn as_series(&self) -> Option<&Series> {
283
        match self {
284
            Column::Series(s) => Some(s),
285
            _ => None,
286
        }
287
    }
288
    #[inline]
289
    pub fn as_partitioned_column(&self) -> Option<&PartitionedColumn> {
290
        match self {
291
            Column::Partitioned(s) => Some(s),
292
            _ => None,
293
        }
294
    }
295
    #[inline]
296
    pub fn as_scalar_column(&self) -> Option<&ScalarColumn> {
297
        match self {
298
            Column::Scalar(s) => Some(s),
299
            _ => None,
300
        }
301
    }
302
    #[inline]
303
    pub fn as_scalar_column_mut(&mut self) -> Option<&mut ScalarColumn> {
304
        match self {
305
            Column::Scalar(s) => Some(s),
306
            _ => None,
307
        }
308
    }
309

310
    // # Try to Chunked Arrays
311
    pub fn try_bool(&self) -> Option<&BooleanChunked> {
312
        self.as_materialized_series().try_bool()
313
    }
314
    pub fn try_i8(&self) -> Option<&Int8Chunked> {
315
        self.as_materialized_series().try_i8()
316
    }
317
    pub fn try_i16(&self) -> Option<&Int16Chunked> {
318
        self.as_materialized_series().try_i16()
319
    }
320
    pub fn try_i32(&self) -> Option<&Int32Chunked> {
321
        self.as_materialized_series().try_i32()
322
    }
323
    pub fn try_i64(&self) -> Option<&Int64Chunked> {
324
        self.as_materialized_series().try_i64()
325
    }
326
    pub fn try_u8(&self) -> Option<&UInt8Chunked> {
327
        self.as_materialized_series().try_u8()
328
    }
329
    pub fn try_u16(&self) -> Option<&UInt16Chunked> {
330
        self.as_materialized_series().try_u16()
331
    }
332
    pub fn try_u32(&self) -> Option<&UInt32Chunked> {
333
        self.as_materialized_series().try_u32()
334
    }
335
    pub fn try_u64(&self) -> Option<&UInt64Chunked> {
336
        self.as_materialized_series().try_u64()
337
    }
338
    pub fn try_f32(&self) -> Option<&Float32Chunked> {
339
        self.as_materialized_series().try_f32()
340
    }
341
    pub fn try_f64(&self) -> Option<&Float64Chunked> {
342
        self.as_materialized_series().try_f64()
343
    }
344
    pub fn try_str(&self) -> Option<&StringChunked> {
345
        self.as_materialized_series().try_str()
346
    }
347
    pub fn try_list(&self) -> Option<&ListChunked> {
348
        self.as_materialized_series().try_list()
349
    }
350
    pub fn try_binary(&self) -> Option<&BinaryChunked> {
351
        self.as_materialized_series().try_binary()
352
    }
353
    pub fn try_idx(&self) -> Option<&IdxCa> {
354
        self.as_materialized_series().try_idx()
355
    }
356
    pub fn try_binary_offset(&self) -> Option<&BinaryOffsetChunked> {
357
        self.as_materialized_series().try_binary_offset()
358
    }
359
    #[cfg(feature = "dtype-datetime")]
360
    pub fn try_datetime(&self) -> Option<&DatetimeChunked> {
361
        self.as_materialized_series().try_datetime()
362
    }
363
    #[cfg(feature = "dtype-struct")]
364
    pub fn try_struct(&self) -> Option<&StructChunked> {
365
        self.as_materialized_series().try_struct()
366
    }
367
    #[cfg(feature = "dtype-decimal")]
368
    pub fn try_decimal(&self) -> Option<&DecimalChunked> {
369
        self.as_materialized_series().try_decimal()
370
    }
371
    #[cfg(feature = "dtype-array")]
372
    pub fn try_array(&self) -> Option<&ArrayChunked> {
373
        self.as_materialized_series().try_array()
374
    }
375
    #[cfg(feature = "dtype-categorical")]
376
    pub fn try_cat<T: PolarsCategoricalType>(&self) -> Option<&CategoricalChunked<T>> {
377
        self.as_materialized_series().try_cat::<T>()
378
    }
379
    #[cfg(feature = "dtype-categorical")]
380
    pub fn try_cat8(&self) -> Option<&Categorical8Chunked> {
381
        self.as_materialized_series().try_cat8()
382
    }
383
    #[cfg(feature = "dtype-categorical")]
384
    pub fn try_cat16(&self) -> Option<&Categorical16Chunked> {
385
        self.as_materialized_series().try_cat16()
386
    }
387
    #[cfg(feature = "dtype-categorical")]
388
    pub fn try_cat32(&self) -> Option<&Categorical32Chunked> {
389
        self.as_materialized_series().try_cat32()
390
    }
391
    #[cfg(feature = "dtype-date")]
392
    pub fn try_date(&self) -> Option<&DateChunked> {
393
        self.as_materialized_series().try_date()
394
    }
395
    #[cfg(feature = "dtype-duration")]
396
    pub fn try_duration(&self) -> Option<&DurationChunked> {
397
        self.as_materialized_series().try_duration()
398
    }
399

400
    // # To Chunked Arrays
401
    pub fn bool(&self) -> PolarsResult<&BooleanChunked> {
402
        self.as_materialized_series().bool()
403
    }
404
    pub fn i8(&self) -> PolarsResult<&Int8Chunked> {
405
        self.as_materialized_series().i8()
406
    }
407
    pub fn i16(&self) -> PolarsResult<&Int16Chunked> {
408
        self.as_materialized_series().i16()
409
    }
410
    pub fn i32(&self) -> PolarsResult<&Int32Chunked> {
411
        self.as_materialized_series().i32()
412
    }
413
    pub fn i64(&self) -> PolarsResult<&Int64Chunked> {
414
        self.as_materialized_series().i64()
415
    }
416
    #[cfg(feature = "dtype-i128")]
417
    pub fn i128(&self) -> PolarsResult<&Int128Chunked> {
418
        self.as_materialized_series().i128()
419
    }
420
    pub fn u8(&self) -> PolarsResult<&UInt8Chunked> {
421
        self.as_materialized_series().u8()
422
    }
423
    pub fn u16(&self) -> PolarsResult<&UInt16Chunked> {
424
        self.as_materialized_series().u16()
425
    }
426
    pub fn u32(&self) -> PolarsResult<&UInt32Chunked> {
427
        self.as_materialized_series().u32()
428
    }
429
    pub fn u64(&self) -> PolarsResult<&UInt64Chunked> {
430
        self.as_materialized_series().u64()
431
    }
432
    pub fn f32(&self) -> PolarsResult<&Float32Chunked> {
433
        self.as_materialized_series().f32()
434
    }
435
    pub fn f64(&self) -> PolarsResult<&Float64Chunked> {
436
        self.as_materialized_series().f64()
437
    }
438
    pub fn str(&self) -> PolarsResult<&StringChunked> {
439
        self.as_materialized_series().str()
440
    }
441
    pub fn list(&self) -> PolarsResult<&ListChunked> {
442
        self.as_materialized_series().list()
443
    }
444
    pub fn binary(&self) -> PolarsResult<&BinaryChunked> {
445
        self.as_materialized_series().binary()
446
    }
447
    pub fn idx(&self) -> PolarsResult<&IdxCa> {
448
        self.as_materialized_series().idx()
449
    }
450
    pub fn binary_offset(&self) -> PolarsResult<&BinaryOffsetChunked> {
451
        self.as_materialized_series().binary_offset()
452
    }
453
    #[cfg(feature = "dtype-datetime")]
454
    pub fn datetime(&self) -> PolarsResult<&DatetimeChunked> {
455
        self.as_materialized_series().datetime()
456
    }
457
    #[cfg(feature = "dtype-struct")]
458
    pub fn struct_(&self) -> PolarsResult<&StructChunked> {
459
        self.as_materialized_series().struct_()
460
    }
461
    #[cfg(feature = "dtype-decimal")]
462
    pub fn decimal(&self) -> PolarsResult<&DecimalChunked> {
463
        self.as_materialized_series().decimal()
464
    }
465
    #[cfg(feature = "dtype-array")]
466
    pub fn array(&self) -> PolarsResult<&ArrayChunked> {
467
        self.as_materialized_series().array()
468
    }
469
    #[cfg(feature = "dtype-categorical")]
470
    pub fn cat<T: PolarsCategoricalType>(&self) -> PolarsResult<&CategoricalChunked<T>> {
471
        self.as_materialized_series().cat::<T>()
472
    }
473
    #[cfg(feature = "dtype-categorical")]
474
    pub fn cat8(&self) -> PolarsResult<&Categorical8Chunked> {
475
        self.as_materialized_series().cat8()
476
    }
477
    #[cfg(feature = "dtype-categorical")]
478
    pub fn cat16(&self) -> PolarsResult<&Categorical16Chunked> {
479
        self.as_materialized_series().cat16()
480
    }
481
    #[cfg(feature = "dtype-categorical")]
482
    pub fn cat32(&self) -> PolarsResult<&Categorical32Chunked> {
483
        self.as_materialized_series().cat32()
484
    }
485
    #[cfg(feature = "dtype-date")]
486
    pub fn date(&self) -> PolarsResult<&DateChunked> {
487
        self.as_materialized_series().date()
488
    }
489
    #[cfg(feature = "dtype-duration")]
490
    pub fn duration(&self) -> PolarsResult<&DurationChunked> {
491
        self.as_materialized_series().duration()
492
    }
493

494
    // # Casting
495
    pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
496
        match self {
497
            Column::Series(s) => s.cast_with_options(dtype, options).map(Column::from),
498
            Column::Partitioned(s) => s.cast_with_options(dtype, options).map(Column::from),
499
            Column::Scalar(s) => s.cast_with_options(dtype, options).map(Column::from),
500
        }
501
    }
502
    pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {
503
        match self {
504
            Column::Series(s) => s.strict_cast(dtype).map(Column::from),
505
            Column::Partitioned(s) => s.strict_cast(dtype).map(Column::from),
506
            Column::Scalar(s) => s.strict_cast(dtype).map(Column::from),
507
        }
508
    }
509
    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Column> {
510
        match self {
511
            Column::Series(s) => s.cast(dtype).map(Column::from),
512
            Column::Partitioned(s) => s.cast(dtype).map(Column::from),
513
            Column::Scalar(s) => s.cast(dtype).map(Column::from),
514
        }
515
    }
516
    /// # Safety
517
    ///
518
    /// This can lead to invalid memory access in downstream code.
519
    pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
520
        match self {
521
            Column::Series(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
522
            Column::Partitioned(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
523
            Column::Scalar(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
524
        }
525
    }
526

527
    pub fn clear(&self) -> Self {
528
        match self {
529
            Column::Series(s) => s.clear().into(),
530
            Column::Partitioned(s) => s.clear().into(),
531
            Column::Scalar(s) => s.resize(0).into(),
532
        }
533
    }
534

535
    #[inline]
536
    pub fn shrink_to_fit(&mut self) {
537
        match self {
538
            Column::Series(s) => s.shrink_to_fit(),
539
            // @partition-opt
540
            Column::Partitioned(_) => {},
541
            Column::Scalar(_) => {},
542
        }
543
    }
544

545
    #[inline]
546
    pub fn new_from_index(&self, index: usize, length: usize) -> Self {
547
        if index >= self.len() {
548
            return Self::full_null(self.name().clone(), length, self.dtype());
549
        }
550

551
        match self {
552
            Column::Series(s) => {
553
                // SAFETY: Bounds check done before.
554
                let av = unsafe { s.get_unchecked(index) };
555
                let scalar = Scalar::new(self.dtype().clone(), av.into_static());
556
                Self::new_scalar(self.name().clone(), scalar, length)
557
            },
558
            Column::Partitioned(s) => {
559
                // SAFETY: Bounds check done before.
560
                let av = unsafe { s.get_unchecked(index) };
561
                let scalar = Scalar::new(self.dtype().clone(), av.into_static());
562
                Self::new_scalar(self.name().clone(), scalar, length)
563
            },
564
            Column::Scalar(s) => s.resize(length).into(),
565
        }
566
    }
567

568
    #[inline]
569
    pub fn has_nulls(&self) -> bool {
570
        match self {
571
            Self::Series(s) => s.has_nulls(),
572
            // @partition-opt
573
            Self::Partitioned(s) => s.as_materialized_series().has_nulls(),
574
            Self::Scalar(s) => s.has_nulls(),
575
        }
576
    }
577

578
    #[inline]
579
    pub fn is_null(&self) -> BooleanChunked {
580
        match self {
581
            Self::Series(s) => s.is_null(),
582
            // @partition-opt
583
            Self::Partitioned(s) => s.as_materialized_series().is_null(),
584
            Self::Scalar(s) => {
585
                BooleanChunked::full(s.name().clone(), s.scalar().is_null(), s.len())
586
            },
587
        }
588
    }
589
    #[inline]
590
    pub fn is_not_null(&self) -> BooleanChunked {
591
        match self {
592
            Self::Series(s) => s.is_not_null(),
593
            // @partition-opt
594
            Self::Partitioned(s) => s.as_materialized_series().is_not_null(),
595
            Self::Scalar(s) => {
596
                BooleanChunked::full(s.name().clone(), !s.scalar().is_null(), s.len())
597
            },
598
        }
599
    }
600

601
    pub fn to_physical_repr(&self) -> Column {
602
        // @scalar-opt
603
        self.as_materialized_series()
604
            .to_physical_repr()
605
            .into_owned()
606
            .into()
607
    }
608
    /// # Safety
609
    ///
610
    /// This can lead to invalid memory access in downstream code.
611
    pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
612
        // @scalar-opt
613
        self.as_materialized_series()
614
            .from_physical_unchecked(dtype)
615
            .map(Column::from)
616
    }
617

618
    pub fn head(&self, length: Option<usize>) -> Column {
619
        let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);
620
        let len = usize::min(len, self.len());
621
        self.slice(0, len)
622
    }
623
    pub fn tail(&self, length: Option<usize>) -> Column {
624
        let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);
625
        let len = usize::min(len, self.len());
626
        debug_assert!(len <= i64::MAX as usize);
627
        self.slice(-(len as i64), len)
628
    }
629
    pub fn slice(&self, offset: i64, length: usize) -> Column {
630
        match self {
631
            Column::Series(s) => s.slice(offset, length).into(),
632
            // @partition-opt
633
            Column::Partitioned(s) => s.as_materialized_series().slice(offset, length).into(),
634
            Column::Scalar(s) => {
635
                let (_, length) = slice_offsets(offset, length, s.len());
636
                s.resize(length).into()
637
            },
638
        }
639
    }
640

641
    pub fn split_at(&self, offset: i64) -> (Column, Column) {
642
        // @scalar-opt
643
        let (l, r) = self.as_materialized_series().split_at(offset);
644
        (l.into(), r.into())
645
    }
646

647
    #[inline]
648
    pub fn null_count(&self) -> usize {
649
        match self {
650
            Self::Series(s) => s.null_count(),
651
            Self::Partitioned(s) => s.null_count(),
652
            Self::Scalar(s) if s.scalar().is_null() => s.len(),
653
            Self::Scalar(_) => 0,
654
        }
655
    }
656

657
    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Column> {
658
        check_bounds_ca(indices, self.len() as IdxSize)?;
659
        Ok(unsafe { self.take_unchecked(indices) })
660
    }
661
    pub fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Column> {
662
        check_bounds(indices, self.len() as IdxSize)?;
663
        Ok(unsafe { self.take_slice_unchecked(indices) })
664
    }
665
    /// # Safety
666
    ///
667
    /// No bounds on the indexes are performed.
668
    pub unsafe fn take_unchecked(&self, indices: &IdxCa) -> Column {
669
        debug_assert!(check_bounds_ca(indices, self.len() as IdxSize).is_ok());
670

671
        match self {
672
            Self::Series(s) => unsafe { s.take_unchecked(indices) }.into(),
673
            Self::Partitioned(s) => {
674
                let s = s.as_materialized_series();
675
                unsafe { s.take_unchecked(indices) }.into()
676
            },
677
            Self::Scalar(s) => {
678
                let idxs_length = indices.len();
679
                let idxs_null_count = indices.null_count();
680

681
                let scalar = ScalarColumn::from_single_value_series(
682
                    s.as_single_value_series().take_unchecked(&IdxCa::new(
683
                        indices.name().clone(),
684
                        &[0][..s.len().min(1)],
685
                    )),
686
                    idxs_length,
687
                );
688

689
                // We need to make sure that null values in `idx` become null values in the result
690
                if idxs_null_count == 0 || scalar.has_nulls() {
691
                    scalar.into_column()
692
                } else if idxs_null_count == idxs_length {
693
                    scalar.into_nulls().into_column()
694
                } else {
695
                    let validity = indices.rechunk_validity();
696
                    let series = scalar.take_materialized_series();
697
                    let name = series.name().clone();
698
                    let dtype = series.dtype().clone();
699
                    let mut chunks = series.into_chunks();
700
                    assert_eq!(chunks.len(), 1);
701
                    chunks[0] = chunks[0].with_validity(validity);
702
                    unsafe { Series::from_chunks_and_dtype_unchecked(name, chunks, &dtype) }
703
                        .into_column()
704
                }
705
            },
706
        }
707
    }
708
    /// # Safety
709
    ///
710
    /// No bounds on the indexes are performed.
711
    pub unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Column {
712
        debug_assert!(check_bounds(indices, self.len() as IdxSize).is_ok());
713

714
        match self {
715
            Self::Series(s) => unsafe { s.take_slice_unchecked(indices) }.into(),
716
            Self::Partitioned(s) => {
717
                let s = s.as_materialized_series();
718
                unsafe { s.take_slice_unchecked(indices) }.into()
719
            },
720
            Self::Scalar(s) => ScalarColumn::from_single_value_series(
721
                s.as_single_value_series()
722
                    .take_slice_unchecked(&[0][..s.len().min(1)]),
723
                indices.len(),
724
            )
725
            .into(),
726
        }
727
    }
728

729
    /// General implementation for aggregation where a non-missing scalar would map to itself.
730
    #[inline(always)]
731
    #[cfg(any(feature = "algorithm_group_by", feature = "bitwise"))]
732
    fn agg_with_unit_scalar(
733
        &self,
734
        groups: &GroupsType,
735
        series_agg: impl Fn(&Series, &GroupsType) -> Series,
736
    ) -> Column {
737
        match self {
738
            Column::Series(s) => series_agg(s, groups).into_column(),
739
            // @partition-opt
740
            Column::Partitioned(s) => series_agg(s.as_materialized_series(), groups).into_column(),
741
            Column::Scalar(s) => {
742
                if s.is_empty() {
743
                    return series_agg(s.as_materialized_series(), groups).into_column();
744
                }
745

746
                // We utilize the aggregation on Series to see:
747
                // 1. the output datatype of the aggregation
748
                // 2. whether this aggregation is even defined
749
                let series_aggregation = series_agg(
750
                    &s.as_single_value_series(),
751
                    &GroupsType::Slice {
752
                        // @NOTE: this group is always valid since s is non-empty.
753
                        groups: vec![[0, 1]],
754
                        rolling: false,
755
                    },
756
                );
757

758
                // If the aggregation is not defined, just return all nulls.
759
                if series_aggregation.has_nulls() {
760
                    return Self::new_scalar(
761
                        series_aggregation.name().clone(),
762
                        Scalar::new(series_aggregation.dtype().clone(), AnyValue::Null),
763
                        groups.len(),
764
                    );
765
                }
766

767
                let mut scalar_col = s.resize(groups.len());
768
                // The aggregation might change the type (e.g. mean changes int -> float), so we do
769
                // a cast here to the output type.
770
                if series_aggregation.dtype() != s.dtype() {
771
                    scalar_col = scalar_col.cast(series_aggregation.dtype()).unwrap();
772
                }
773

774
                let Some(first_empty_idx) = groups.iter().position(|g| g.is_empty()) else {
775
                    // Fast path: no empty groups. keep the scalar intact.
776
                    return scalar_col.into_column();
777
                };
778

779
                // All empty groups produce a *missing* or `null` value.
780
                let mut validity = BitmapBuilder::with_capacity(groups.len());
781
                validity.extend_constant(first_empty_idx, true);
782
                // SAFETY: We trust the length of this iterator.
783
                let iter = unsafe {
784
                    TrustMyLength::new(
785
                        groups.iter().skip(first_empty_idx).map(|g| !g.is_empty()),
786
                        groups.len() - first_empty_idx,
787
                    )
788
                };
789
                validity.extend_trusted_len_iter(iter);
790

791
                let mut s = scalar_col.take_materialized_series().rechunk();
792
                // SAFETY: We perform a compute_len afterwards.
793
                let chunks = unsafe { s.chunks_mut() };
794
                let arr = &mut chunks[0];
795
                *arr = arr.with_validity(validity.into_opt_validity());
796
                s.compute_len();
797

798
                s.into_column()
799
            },
800
        }
801
    }
802

803
    /// # Safety
804
    ///
805
    /// Does no bounds checks, groups must be correct.
806
    #[cfg(feature = "algorithm_group_by")]
807
    pub unsafe fn agg_min(&self, groups: &GroupsType) -> Self {
808
        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_min(g) })
809
    }
810

811
    /// # Safety
812
    ///
813
    /// Does no bounds checks, groups must be correct.
814
    #[cfg(feature = "algorithm_group_by")]
815
    pub unsafe fn agg_max(&self, groups: &GroupsType) -> Self {
816
        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_max(g) })
817
    }
818

819
    /// # Safety
820
    ///
821
    /// Does no bounds checks, groups must be correct.
822
    #[cfg(feature = "algorithm_group_by")]
823
    pub unsafe fn agg_mean(&self, groups: &GroupsType) -> Self {
824
        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_mean(g) })
825
    }
826

827
    /// # Safety
828
    ///
829
    /// Does no bounds checks, groups must be correct.
830
    #[cfg(feature = "algorithm_group_by")]
831
    pub unsafe fn agg_sum(&self, groups: &GroupsType) -> Self {
832
        // @scalar-opt
833
        unsafe { self.as_materialized_series().agg_sum(groups) }.into()
834
    }
835

836
    /// # Safety
837
    ///
838
    /// Does no bounds checks, groups must be correct.
839
    #[cfg(feature = "algorithm_group_by")]
840
    pub unsafe fn agg_first(&self, groups: &GroupsType) -> Self {
841
        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_first(g) })
842
    }
843

844
    /// # Safety
845
    ///
846
    /// Does no bounds checks, groups must be correct.
847
    #[cfg(feature = "algorithm_group_by")]
848
    pub unsafe fn agg_last(&self, groups: &GroupsType) -> Self {
849
        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_last(g) })
850
    }
851

852
    /// # Safety
853
    ///
854
    /// Does no bounds checks, groups must be correct.
855
    #[cfg(feature = "algorithm_group_by")]
856
    pub unsafe fn agg_n_unique(&self, groups: &GroupsType) -> Self {
857
        // @scalar-opt
858
        unsafe { self.as_materialized_series().agg_n_unique(groups) }.into()
859
    }
860

861
    /// # Safety
862
    ///
863
    /// Does no bounds checks, groups must be correct.
864
    #[cfg(feature = "algorithm_group_by")]
865
    pub unsafe fn agg_quantile(
866
        &self,
867
        groups: &GroupsType,
868
        quantile: f64,
869
        method: QuantileMethod,
870
    ) -> Self {
871
        // @scalar-opt
872

873
        unsafe {
874
            self.as_materialized_series()
875
                .agg_quantile(groups, quantile, method)
876
        }
877
        .into()
878
    }
879

880
    /// # Safety
881
    ///
882
    /// Does no bounds checks, groups must be correct.
883
    #[cfg(feature = "algorithm_group_by")]
884
    pub unsafe fn agg_median(&self, groups: &GroupsType) -> Self {
885
        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_median(g) })
886
    }
887

888
    /// # Safety
889
    ///
890
    /// Does no bounds checks, groups must be correct.
891
    #[cfg(feature = "algorithm_group_by")]
892
    pub unsafe fn agg_var(&self, groups: &GroupsType, ddof: u8) -> Self {
893
        // @scalar-opt
894
        unsafe { self.as_materialized_series().agg_var(groups, ddof) }.into()
895
    }
896

897
    /// # Safety
898
    ///
899
    /// Does no bounds checks, groups must be correct.
900
    #[cfg(feature = "algorithm_group_by")]
901
    pub unsafe fn agg_std(&self, groups: &GroupsType, ddof: u8) -> Self {
902
        // @scalar-opt
903
        unsafe { self.as_materialized_series().agg_std(groups, ddof) }.into()
904
    }
905

906
    /// # Safety
907
    ///
908
    /// Does no bounds checks, groups must be correct.
909
    #[cfg(feature = "algorithm_group_by")]
910
    pub unsafe fn agg_list(&self, groups: &GroupsType) -> Self {
911
        // @scalar-opt
912
        unsafe { self.as_materialized_series().agg_list(groups) }.into()
913
    }
914

915
    /// # Safety
916
    ///
917
    /// Does no bounds checks, groups must be correct.
918
    #[cfg(feature = "algorithm_group_by")]
919
    pub fn agg_valid_count(&self, groups: &GroupsType) -> Self {
920
        // @partition-opt
921
        // @scalar-opt
922
        unsafe { self.as_materialized_series().agg_valid_count(groups) }.into()
923
    }
924

925
    /// # Safety
926
    ///
927
    /// Does no bounds checks, groups must be correct.
928
    #[cfg(feature = "bitwise")]
929
    pub fn agg_and(&self, groups: &GroupsType) -> Self {
930
        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_and(g) })
931
    }
932
    /// # Safety
933
    ///
934
    /// Does no bounds checks, groups must be correct.
935
    #[cfg(feature = "bitwise")]
936
    pub fn agg_or(&self, groups: &GroupsType) -> Self {
937
        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_or(g) })
938
    }
939
    /// # Safety
940
    ///
941
    /// Does no bounds checks, groups must be correct.
942
    #[cfg(feature = "bitwise")]
943
    pub fn agg_xor(&self, groups: &GroupsType) -> Self {
944
        // @partition-opt
945
        // @scalar-opt
946
        unsafe { self.as_materialized_series().agg_xor(groups) }.into()
947
    }
948

949
    pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self {
950
        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size)
951
    }
952

953
    pub fn is_empty(&self) -> bool {
954
        self.len() == 0
955
    }
956

957
    pub fn reverse(&self) -> Column {
958
        match self {
959
            Column::Series(s) => s.reverse().into(),
960
            Column::Partitioned(s) => s.reverse().into(),
961
            Column::Scalar(_) => self.clone(),
962
        }
963
    }
964

965
    pub fn equals(&self, other: &Column) -> bool {
966
        // @scalar-opt
967
        self.as_materialized_series()
968
            .equals(other.as_materialized_series())
969
    }
970

971
    pub fn equals_missing(&self, other: &Column) -> bool {
972
        // @scalar-opt
973
        self.as_materialized_series()
974
            .equals_missing(other.as_materialized_series())
975
    }
976

977
    pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
978
        // @scalar-opt
979
        match self {
980
            Column::Series(s) => s.set_sorted_flag(sorted),
981
            Column::Partitioned(s) => s.set_sorted_flag(sorted),
982
            Column::Scalar(_) => {},
983
        }
984
    }
985

986
    pub fn get_flags(&self) -> StatisticsFlags {
987
        match self {
988
            Column::Series(s) => s.get_flags(),
989
            // @partition-opt
990
            Column::Partitioned(_) => StatisticsFlags::empty(),
991
            Column::Scalar(_) => {
992
                StatisticsFlags::IS_SORTED_ASC | StatisticsFlags::CAN_FAST_EXPLODE_LIST
993
            },
994
        }
995
    }
996

997
    /// Returns whether the flags were set
998
    pub fn set_flags(&mut self, flags: StatisticsFlags) -> bool {
999
        match self {
1000
            Column::Series(s) => {
1001
                s.set_flags(flags);
1002
                true
1003
            },
1004
            // @partition-opt
1005
            Column::Partitioned(_) => false,
1006
            Column::Scalar(_) => false,
1007
        }
1008
    }
1009

1010
    pub fn vec_hash(
1011
        &self,
1012
        build_hasher: PlSeedableRandomStateQuality,
1013
        buf: &mut Vec<u64>,
1014
    ) -> PolarsResult<()> {
1015
        // @scalar-opt?
1016
        self.as_materialized_series().vec_hash(build_hasher, buf)
1017
    }
1018

1019
    pub fn vec_hash_combine(
1020
        &self,
1021
        build_hasher: PlSeedableRandomStateQuality,
1022
        hashes: &mut [u64],
1023
    ) -> PolarsResult<()> {
1024
        // @scalar-opt?
1025
        self.as_materialized_series()
1026
            .vec_hash_combine(build_hasher, hashes)
1027
    }
1028

1029
    pub fn append(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1030
        // @scalar-opt
1031
        self.into_materialized_series()
1032
            .append(other.as_materialized_series())?;
1033
        Ok(self)
1034
    }
1035
    pub fn append_owned(&mut self, other: Column) -> PolarsResult<&mut Self> {
1036
        self.into_materialized_series()
1037
            .append_owned(other.take_materialized_series())?;
1038
        Ok(self)
1039
    }
1040

1041
    pub fn arg_sort(&self, options: SortOptions) -> IdxCa {
1042
        if self.is_empty() {
1043
            return IdxCa::from_vec(self.name().clone(), Vec::new());
1044
        }
1045

1046
        if self.null_count() == self.len() {
1047
            // We might need to maintain order so just respect the descending parameter.
1048
            let values = if options.descending {
1049
                (0..self.len() as IdxSize).rev().collect()
1050
            } else {
1051
                (0..self.len() as IdxSize).collect()
1052
            };
1053

1054
            return IdxCa::from_vec(self.name().clone(), values);
1055
        }
1056

1057
        let is_sorted = Some(self.is_sorted_flag());
1058
        let Some(is_sorted) = is_sorted.filter(|v| !matches!(v, IsSorted::Not)) else {
1059
            return self.as_materialized_series().arg_sort(options);
1060
        };
1061

1062
        // Fast path: the data is sorted.
1063
        let is_sorted_dsc = matches!(is_sorted, IsSorted::Descending);
1064
        let invert = options.descending != is_sorted_dsc;
1065

1066
        let mut values = Vec::with_capacity(self.len());
1067

1068
        #[inline(never)]
1069
        fn extend(
1070
            start: IdxSize,
1071
            end: IdxSize,
1072
            slf: &Column,
1073
            values: &mut Vec<IdxSize>,
1074
            is_only_nulls: bool,
1075
            invert: bool,
1076
            maintain_order: bool,
1077
        ) {
1078
            debug_assert!(start <= end);
1079
            debug_assert!(start as usize <= slf.len());
1080
            debug_assert!(end as usize <= slf.len());
1081

1082
            if !invert || is_only_nulls {
1083
                values.extend(start..end);
1084
                return;
1085
            }
1086

1087
            // If we don't have to maintain order but we have to invert. Just flip it around.
1088
            if !maintain_order {
1089
                values.extend((start..end).rev());
1090
                return;
1091
            }
1092

1093
            // If we want to maintain order but we also needs to invert, we need to invert
1094
            // per group of items.
1095
            //
1096
            // @NOTE: Since the column is sorted, arg_unique can also take a fast path and
1097
            // just do a single traversal.
1098
            let arg_unique = slf
1099
                .slice(start as i64, (end - start) as usize)
1100
                .arg_unique()
1101
                .unwrap();
1102

1103
            assert!(!arg_unique.has_nulls());
1104

1105
            let num_unique = arg_unique.len();
1106

1107
            // Fast path: all items are unique.
1108
            if num_unique == (end - start) as usize {
1109
                values.extend((start..end).rev());
1110
                return;
1111
            }
1112

1113
            if num_unique == 1 {
1114
                values.extend(start..end);
1115
                return;
1116
            }
1117

1118
            let mut prev_idx = end - start;
1119
            for chunk in arg_unique.downcast_iter() {
1120
                for &idx in chunk.values().as_slice().iter().rev() {
1121
                    values.extend(start + idx..start + prev_idx);
1122
                    prev_idx = idx;
1123
                }
1124
            }
1125
        }
1126
        macro_rules! extend {
1127
            ($start:expr, $end:expr) => {
1128
                extend!($start, $end, is_only_nulls = false);
1129
            };
1130
            ($start:expr, $end:expr, is_only_nulls = $is_only_nulls:expr) => {
1131
                extend(
1132
                    $start,
1133
                    $end,
1134
                    self,
1135
                    &mut values,
1136
                    $is_only_nulls,
1137
                    invert,
1138
                    options.maintain_order,
1139
                );
1140
            };
1141
        }
1142

1143
        let length = self.len() as IdxSize;
1144
        let null_count = self.null_count() as IdxSize;
1145

1146
        if null_count == 0 {
1147
            extend!(0, length);
1148
        } else {
1149
            let has_nulls_last = self.get(self.len() - 1).unwrap().is_null();
1150
            match (options.nulls_last, has_nulls_last) {
1151
                (true, true) => {
1152
                    // Current: Nulls last, Wanted: Nulls last
1153
                    extend!(0, length - null_count);
1154
                    extend!(length - null_count, length, is_only_nulls = true);
1155
                },
1156
                (true, false) => {
1157
                    // Current: Nulls first, Wanted: Nulls last
1158
                    extend!(null_count, length);
1159
                    extend!(0, null_count, is_only_nulls = true);
1160
                },
1161
                (false, true) => {
1162
                    // Current: Nulls last, Wanted: Nulls first
1163
                    extend!(length - null_count, length, is_only_nulls = true);
1164
                    extend!(0, length - null_count);
1165
                },
1166
                (false, false) => {
1167
                    // Current: Nulls first, Wanted: Nulls first
1168
                    extend!(0, null_count, is_only_nulls = true);
1169
                    extend!(null_count, length);
1170
                },
1171
            }
1172
        }
1173

1174
        // @NOTE: This can theoretically be pushed into the previous operation but it is really
1175
        // worth it... probably not...
1176
        if let Some(limit) = options.limit {
1177
            let limit = limit.min(length);
1178
            values.truncate(limit as usize);
1179
        }
1180

1181
        IdxCa::from_vec(self.name().clone(), values)
1182
    }
1183

1184
    pub fn arg_sort_multiple(
1185
        &self,
1186
        by: &[Column],
1187
        options: &SortMultipleOptions,
1188
    ) -> PolarsResult<IdxCa> {
1189
        // @scalar-opt
1190
        self.as_materialized_series().arg_sort_multiple(by, options)
1191
    }
1192

1193
    pub fn arg_unique(&self) -> PolarsResult<IdxCa> {
1194
        match self {
1195
            Column::Scalar(s) => Ok(IdxCa::new_vec(s.name().clone(), vec![0])),
1196
            _ => self.as_materialized_series().arg_unique(),
1197
        }
1198
    }
1199

1200
    pub fn bit_repr(&self) -> Option<BitRepr> {
1201
        // @scalar-opt
1202
        self.as_materialized_series().bit_repr()
1203
    }
1204

1205
    pub fn into_frame(self) -> DataFrame {
1206
        // SAFETY: A single-column dataframe cannot have length mismatches or duplicate names
1207
        unsafe { DataFrame::new_no_checks(self.len(), vec![self]) }
1208
    }
1209

1210
    pub fn extend(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1211
        // @scalar-opt
1212
        self.into_materialized_series()
1213
            .extend(other.as_materialized_series())?;
1214
        Ok(self)
1215
    }
1216

1217
    pub fn rechunk(&self) -> Column {
1218
        match self {
1219
            Column::Series(s) => s.rechunk().into(),
1220
            Column::Partitioned(s) => {
1221
                if let Some(s) = s.lazy_as_materialized_series() {
1222
                    // This should always hold for partitioned.
1223
                    debug_assert_eq!(s.n_chunks(), 1)
1224
                }
1225
                self.clone()
1226
            },
1227
            Column::Scalar(s) => {
1228
                if s.lazy_as_materialized_series()
1229
                    .filter(|x| x.n_chunks() > 1)
1230
                    .is_some()
1231
                {
1232
                    Column::Scalar(ScalarColumn::new(
1233
                        s.name().clone(),
1234
                        s.scalar().clone(),
1235
                        s.len(),
1236
                    ))
1237
                } else {
1238
                    self.clone()
1239
                }
1240
            },
1241
        }
1242
    }
1243

1244
    pub fn explode(&self, skip_empty: bool) -> PolarsResult<Column> {
1245
        self.as_materialized_series()
1246
            .explode(skip_empty)
1247
            .map(Column::from)
1248
    }
1249
    pub fn implode(&self) -> PolarsResult<ListChunked> {
1250
        self.as_materialized_series().implode()
1251
    }
1252

1253
    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
1254
        // @scalar-opt
1255
        self.as_materialized_series()
1256
            .fill_null(strategy)
1257
            .map(Column::from)
1258
    }
1259

1260
    pub fn divide(&self, rhs: &Column) -> PolarsResult<Self> {
1261
        // @scalar-opt
1262
        self.as_materialized_series()
1263
            .divide(rhs.as_materialized_series())
1264
            .map(Column::from)
1265
    }
1266

1267
    pub fn shift(&self, periods: i64) -> Column {
1268
        // @scalar-opt
1269
        self.as_materialized_series().shift(periods).into()
1270
    }
1271

1272
    #[cfg(feature = "zip_with")]
1273
    pub fn zip_with(&self, mask: &BooleanChunked, other: &Self) -> PolarsResult<Self> {
1274
        // @scalar-opt
1275
        self.as_materialized_series()
1276
            .zip_with(mask, other.as_materialized_series())
1277
            .map(Self::from)
1278
    }
1279

1280
    #[cfg(feature = "zip_with")]
1281
    pub fn zip_with_same_type(
1282
        &self,
1283
        mask: &ChunkedArray<BooleanType>,
1284
        other: &Column,
1285
    ) -> PolarsResult<Column> {
1286
        // @scalar-opt
1287
        self.as_materialized_series()
1288
            .zip_with_same_type(mask, other.as_materialized_series())
1289
            .map(Column::from)
1290
    }
1291

1292
    pub fn drop_nulls(&self) -> Column {
1293
        match self {
1294
            Column::Series(s) => s.drop_nulls().into_column(),
1295
            // @partition-opt
1296
            Column::Partitioned(s) => s.as_materialized_series().drop_nulls().into_column(),
1297
            Column::Scalar(s) => s.drop_nulls().into_column(),
1298
        }
1299
    }
1300

1301
    /// Packs every element into a list.
1302
    pub fn as_list(&self) -> ListChunked {
1303
        // @scalar-opt
1304
        // @partition-opt
1305
        self.as_materialized_series().as_list()
1306
    }
1307

1308
    pub fn is_sorted_flag(&self) -> IsSorted {
1309
        match self {
1310
            Column::Series(s) => s.is_sorted_flag(),
1311
            Column::Partitioned(s) => s.partitions().is_sorted_flag(),
1312
            Column::Scalar(_) => IsSorted::Ascending,
1313
        }
1314
    }
1315

1316
    pub fn unique(&self) -> PolarsResult<Column> {
1317
        match self {
1318
            Column::Series(s) => s.unique().map(Column::from),
1319
            // @partition-opt
1320
            Column::Partitioned(s) => s.as_materialized_series().unique().map(Column::from),
1321
            Column::Scalar(s) => {
1322
                _ = s.as_single_value_series().unique()?;
1323
                if s.is_empty() {
1324
                    return Ok(s.clone().into_column());
1325
                }
1326

1327
                Ok(s.resize(1).into_column())
1328
            },
1329
        }
1330
    }
1331
    pub fn unique_stable(&self) -> PolarsResult<Column> {
1332
        match self {
1333
            Column::Series(s) => s.unique_stable().map(Column::from),
1334
            // @partition-opt
1335
            Column::Partitioned(s) => s.as_materialized_series().unique_stable().map(Column::from),
1336
            Column::Scalar(s) => {
1337
                _ = s.as_single_value_series().unique_stable()?;
1338
                if s.is_empty() {
1339
                    return Ok(s.clone().into_column());
1340
                }
1341

1342
                Ok(s.resize(1).into_column())
1343
            },
1344
        }
1345
    }
1346

1347
    pub fn reshape_list(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1348
        // @scalar-opt
1349
        self.as_materialized_series()
1350
            .reshape_list(dimensions)
1351
            .map(Self::from)
1352
    }
1353

1354
    #[cfg(feature = "dtype-array")]
1355
    pub fn reshape_array(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1356
        // @scalar-opt
1357
        self.as_materialized_series()
1358
            .reshape_array(dimensions)
1359
            .map(Self::from)
1360
    }
1361

1362
    pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {
1363
        // @scalar-opt
1364
        self.as_materialized_series()
1365
            .sort(sort_options)
1366
            .map(Self::from)
1367
    }
1368

1369
    pub fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Self> {
1370
        match self {
1371
            Column::Series(s) => s.filter(filter).map(Column::from),
1372
            Column::Partitioned(s) => s.as_materialized_series().filter(filter).map(Column::from),
1373
            Column::Scalar(s) => {
1374
                if s.is_empty() {
1375
                    return Ok(s.clone().into_column());
1376
                }
1377

1378
                // Broadcasting
1379
                if filter.len() == 1 {
1380
                    return match filter.get(0) {
1381
                        Some(true) => Ok(s.clone().into_column()),
1382
                        _ => Ok(s.resize(0).into_column()),
1383
                    };
1384
                }
1385

1386
                Ok(s.resize(filter.sum().unwrap() as usize).into_column())
1387
            },
1388
        }
1389
    }
1390

1391
    #[cfg(feature = "random")]
1392
    pub fn shuffle(&self, seed: Option<u64>) -> Self {
1393
        // @scalar-opt
1394
        self.as_materialized_series().shuffle(seed).into()
1395
    }
1396

1397
    #[cfg(feature = "random")]
1398
    pub fn sample_frac(
1399
        &self,
1400
        frac: f64,
1401
        with_replacement: bool,
1402
        shuffle: bool,
1403
        seed: Option<u64>,
1404
    ) -> PolarsResult<Self> {
1405
        self.as_materialized_series()
1406
            .sample_frac(frac, with_replacement, shuffle, seed)
1407
            .map(Self::from)
1408
    }
1409

1410
    #[cfg(feature = "random")]
1411
    pub fn sample_n(
1412
        &self,
1413
        n: usize,
1414
        with_replacement: bool,
1415
        shuffle: bool,
1416
        seed: Option<u64>,
1417
    ) -> PolarsResult<Self> {
1418
        self.as_materialized_series()
1419
            .sample_n(n, with_replacement, shuffle, seed)
1420
            .map(Self::from)
1421
    }
1422

1423
    pub fn gather_every(&self, n: usize, offset: usize) -> PolarsResult<Column> {
1424
        polars_ensure!(n > 0, InvalidOperation: "gather_every(n): n should be positive");
1425
        if self.len().saturating_sub(offset) == 0 {
1426
            return Ok(self.clear());
1427
        }
1428

1429
        match self {
1430
            Column::Series(s) => Ok(s.gather_every(n, offset)?.into()),
1431
            Column::Partitioned(s) => {
1432
                Ok(s.as_materialized_series().gather_every(n, offset)?.into())
1433
            },
1434
            Column::Scalar(s) => {
1435
                let total = s.len() - offset;
1436
                Ok(s.resize(1 + (total - 1) / n).into())
1437
            },
1438
        }
1439
    }
1440

1441
    pub fn extend_constant(&self, value: AnyValue, n: usize) -> PolarsResult<Self> {
1442
        if self.is_empty() {
1443
            return Ok(Self::new_scalar(
1444
                self.name().clone(),
1445
                Scalar::new(self.dtype().clone(), value.into_static()),
1446
                n,
1447
            ));
1448
        }
1449

1450
        match self {
1451
            Column::Series(s) => s.extend_constant(value, n).map(Column::from),
1452
            Column::Partitioned(s) => s.extend_constant(value, n).map(Column::from),
1453
            Column::Scalar(s) => {
1454
                if s.scalar().as_any_value() == value {
1455
                    Ok(s.resize(s.len() + n).into())
1456
                } else {
1457
                    s.as_materialized_series()
1458
                        .extend_constant(value, n)
1459
                        .map(Column::from)
1460
                }
1461
            },
1462
        }
1463
    }
1464

1465
    pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {
1466
        self.try_map_unary_elementwise_to_bool(|s| s.is_finite())
1467
    }
1468
    pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {
1469
        self.try_map_unary_elementwise_to_bool(|s| s.is_infinite())
1470
    }
1471
    pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {
1472
        self.try_map_unary_elementwise_to_bool(|s| s.is_nan())
1473
    }
1474
    pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {
1475
        self.try_map_unary_elementwise_to_bool(|s| s.is_not_nan())
1476
    }
1477

1478
    pub fn wrapping_trunc_div_scalar<T>(&self, rhs: T) -> Self
1479
    where
1480
        T: Num + NumCast,
1481
    {
1482
        // @scalar-opt
1483
        self.as_materialized_series()
1484
            .wrapping_trunc_div_scalar(rhs)
1485
            .into()
1486
    }
1487

1488
    pub fn product(&self) -> PolarsResult<Scalar> {
1489
        // @scalar-opt
1490
        self.as_materialized_series().product()
1491
    }
1492

1493
    pub fn phys_iter(&self) -> SeriesPhysIter<'_> {
1494
        // @scalar-opt
1495
        self.as_materialized_series().phys_iter()
1496
    }
1497

1498
    #[inline]
1499
    pub fn get(&self, index: usize) -> PolarsResult<AnyValue<'_>> {
1500
        polars_ensure!(index < self.len(), oob = index, self.len());
1501

1502
        // SAFETY: Bounds check done just before.
1503
        Ok(unsafe { self.get_unchecked(index) })
1504
    }
1505
    /// # Safety
1506
    ///
1507
    /// Does not perform bounds check on `index`
1508
    #[inline(always)]
1509
    pub unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_> {
1510
        debug_assert!(index < self.len());
1511

1512
        match self {
1513
            Column::Series(s) => unsafe { s.get_unchecked(index) },
1514
            Column::Partitioned(s) => unsafe { s.get_unchecked(index) },
1515
            Column::Scalar(s) => s.scalar().as_any_value(),
1516
        }
1517
    }
1518

1519
    #[cfg(feature = "object")]
1520
    pub fn get_object(
1521
        &self,
1522
        index: usize,
1523
    ) -> Option<&dyn crate::chunked_array::object::PolarsObjectSafe> {
1524
        self.as_materialized_series().get_object(index)
1525
    }
1526

1527
    pub fn bitand(&self, rhs: &Self) -> PolarsResult<Self> {
1528
        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l & r)
1529
    }
1530
    pub fn bitor(&self, rhs: &Self) -> PolarsResult<Self> {
1531
        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l | r)
1532
    }
1533
    pub fn bitxor(&self, rhs: &Self) -> PolarsResult<Self> {
1534
        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l ^ r)
1535
    }
1536

1537
    pub fn try_add_owned(self, other: Self) -> PolarsResult<Self> {
1538
        match (self, other) {
1539
            (Column::Series(lhs), Column::Series(rhs)) => {
1540
                lhs.take().try_add_owned(rhs.take()).map(Column::from)
1541
            },
1542
            (lhs, rhs) => lhs + rhs,
1543
        }
1544
    }
1545
    pub fn try_sub_owned(self, other: Self) -> PolarsResult<Self> {
1546
        match (self, other) {
1547
            (Column::Series(lhs), Column::Series(rhs)) => {
1548
                lhs.take().try_sub_owned(rhs.take()).map(Column::from)
1549
            },
1550
            (lhs, rhs) => lhs - rhs,
1551
        }
1552
    }
1553
    pub fn try_mul_owned(self, other: Self) -> PolarsResult<Self> {
1554
        match (self, other) {
1555
            (Column::Series(lhs), Column::Series(rhs)) => {
1556
                lhs.take().try_mul_owned(rhs.take()).map(Column::from)
1557
            },
1558
            (lhs, rhs) => lhs * rhs,
1559
        }
1560
    }
1561

1562
    pub(crate) fn str_value(&self, index: usize) -> PolarsResult<Cow<'_, str>> {
1563
        Ok(self.get(index)?.str_value())
1564
    }
1565

1566
    pub fn min_reduce(&self) -> PolarsResult<Scalar> {
1567
        match self {
1568
            Column::Series(s) => s.min_reduce(),
1569
            Column::Partitioned(s) => s.min_reduce(),
1570
            Column::Scalar(s) => {
1571
                // We don't really want to deal with handling the full semantics here so we just
1572
                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1573
                s.as_single_value_series().min_reduce()
1574
            },
1575
        }
1576
    }
1577
    pub fn max_reduce(&self) -> PolarsResult<Scalar> {
1578
        match self {
1579
            Column::Series(s) => s.max_reduce(),
1580
            Column::Partitioned(s) => s.max_reduce(),
1581
            Column::Scalar(s) => {
1582
                // We don't really want to deal with handling the full semantics here so we just
1583
                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1584
                s.as_single_value_series().max_reduce()
1585
            },
1586
        }
1587
    }
1588
    pub fn median_reduce(&self) -> PolarsResult<Scalar> {
1589
        match self {
1590
            Column::Series(s) => s.median_reduce(),
1591
            Column::Partitioned(s) => s.as_materialized_series().median_reduce(),
1592
            Column::Scalar(s) => {
1593
                // We don't really want to deal with handling the full semantics here so we just
1594
                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1595
                s.as_single_value_series().median_reduce()
1596
            },
1597
        }
1598
    }
1599
    pub fn mean_reduce(&self) -> Scalar {
1600
        match self {
1601
            Column::Series(s) => s.mean_reduce(),
1602
            Column::Partitioned(s) => s.as_materialized_series().mean_reduce(),
1603
            Column::Scalar(s) => {
1604
                // We don't really want to deal with handling the full semantics here so we just
1605
                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1606
                s.as_single_value_series().mean_reduce()
1607
            },
1608
        }
1609
    }
1610
    pub fn std_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1611
        match self {
1612
            Column::Series(s) => s.std_reduce(ddof),
1613
            Column::Partitioned(s) => s.as_materialized_series().std_reduce(ddof),
1614
            Column::Scalar(s) => {
1615
                // We don't really want to deal with handling the full semantics here so we just
1616
                // cast to a small series. This is a tiny bit wasteful, but probably fine.
1617
                let n = s.len().min(ddof as usize + 1);
1618
                s.as_n_values_series(n).std_reduce(ddof)
1619
            },
1620
        }
1621
    }
1622
    pub fn var_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1623
        match self {
1624
            Column::Series(s) => s.var_reduce(ddof),
1625
            Column::Partitioned(s) => s.as_materialized_series().var_reduce(ddof),
1626
            Column::Scalar(s) => {
1627
                // We don't really want to deal with handling the full semantics here so we just
1628
                // cast to a small series. This is a tiny bit wasteful, but probably fine.
1629
                let n = s.len().min(ddof as usize + 1);
1630
                s.as_n_values_series(n).var_reduce(ddof)
1631
            },
1632
        }
1633
    }
1634
    pub fn sum_reduce(&self) -> PolarsResult<Scalar> {
1635
        // @partition-opt
1636
        // @scalar-opt
1637
        self.as_materialized_series().sum_reduce()
1638
    }
1639
    pub fn and_reduce(&self) -> PolarsResult<Scalar> {
1640
        match self {
1641
            Column::Series(s) => s.and_reduce(),
1642
            Column::Partitioned(s) => s.and_reduce(),
1643
            Column::Scalar(s) => {
1644
                // We don't really want to deal with handling the full semantics here so we just
1645
                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1646
                s.as_single_value_series().and_reduce()
1647
            },
1648
        }
1649
    }
1650
    pub fn or_reduce(&self) -> PolarsResult<Scalar> {
1651
        match self {
1652
            Column::Series(s) => s.or_reduce(),
1653
            Column::Partitioned(s) => s.or_reduce(),
1654
            Column::Scalar(s) => {
1655
                // We don't really want to deal with handling the full semantics here so we just
1656
                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1657
                s.as_single_value_series().or_reduce()
1658
            },
1659
        }
1660
    }
1661
    pub fn xor_reduce(&self) -> PolarsResult<Scalar> {
1662
        match self {
1663
            Column::Series(s) => s.xor_reduce(),
1664
            // @partition-opt
1665
            Column::Partitioned(s) => s.as_materialized_series().xor_reduce(),
1666
            Column::Scalar(s) => {
1667
                // We don't really want to deal with handling the full semantics here so we just
1668
                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1669
                //
1670
                // We have to deal with the fact that xor is 0 if there is an even number of
1671
                // elements and the value if there is an odd number of elements. If there are zero
1672
                // elements the result should be `null`.
1673
                s.as_n_values_series(2 - s.len() % 2).xor_reduce()
1674
            },
1675
        }
1676
    }
1677
    pub fn n_unique(&self) -> PolarsResult<usize> {
1678
        match self {
1679
            Column::Series(s) => s.n_unique(),
1680
            Column::Partitioned(s) => s.partitions().n_unique(),
1681
            Column::Scalar(s) => s.as_single_value_series().n_unique(),
1682
        }
1683
    }
1684
    pub fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Scalar> {
1685
        self.as_materialized_series()
1686
            .quantile_reduce(quantile, method)
1687
    }
1688

1689
    pub(crate) fn estimated_size(&self) -> usize {
1690
        // @scalar-opt
1691
        self.as_materialized_series().estimated_size()
1692
    }
1693

1694
    pub fn sort_with(&self, options: SortOptions) -> PolarsResult<Self> {
1695
        match self {
1696
            Column::Series(s) => s.sort_with(options).map(Self::from),
1697
            // @partition-opt
1698
            Column::Partitioned(s) => s
1699
                .as_materialized_series()
1700
                .sort_with(options)
1701
                .map(Self::from),
1702
            Column::Scalar(s) => {
1703
                // This makes this function throw the same errors as Series::sort_with
1704
                _ = s.as_single_value_series().sort_with(options)?;
1705

1706
                Ok(self.clone())
1707
            },
1708
        }
1709
    }
1710

1711
    pub fn map_unary_elementwise_to_bool(
1712
        &self,
1713
        f: impl Fn(&Series) -> BooleanChunked,
1714
    ) -> BooleanChunked {
1715
        self.try_map_unary_elementwise_to_bool(|s| Ok(f(s)))
1716
            .unwrap()
1717
    }
1718
    pub fn try_map_unary_elementwise_to_bool(
1719
        &self,
1720
        f: impl Fn(&Series) -> PolarsResult<BooleanChunked>,
1721
    ) -> PolarsResult<BooleanChunked> {
1722
        match self {
1723
            Column::Series(s) => f(s),
1724
            Column::Partitioned(s) => f(s.as_materialized_series()),
1725
            Column::Scalar(s) => Ok(f(&s.as_single_value_series())?.new_from_index(0, s.len())),
1726
        }
1727
    }
1728

1729
    pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column {
1730
        self.try_apply_unary_elementwise(|s| Ok(f(s))).unwrap()
1731
    }
1732
    pub fn try_apply_unary_elementwise(
1733
        &self,
1734
        f: impl Fn(&Series) -> PolarsResult<Series>,
1735
    ) -> PolarsResult<Column> {
1736
        match self {
1737
            Column::Series(s) => f(s).map(Column::from),
1738
            Column::Partitioned(s) => s.try_apply_unary_elementwise(f).map(Self::from),
1739
            Column::Scalar(s) => Ok(ScalarColumn::from_single_value_series(
1740
                f(&s.as_single_value_series())?,
1741
                s.len(),
1742
            )
1743
            .into()),
1744
        }
1745
    }
1746

1747
    pub fn apply_broadcasting_binary_elementwise(
1748
        &self,
1749
        other: &Self,
1750
        op: impl Fn(&Series, &Series) -> Series,
1751
    ) -> PolarsResult<Column> {
1752
        self.try_apply_broadcasting_binary_elementwise(other, |lhs, rhs| Ok(op(lhs, rhs)))
1753
    }
1754
    pub fn try_apply_broadcasting_binary_elementwise(
1755
        &self,
1756
        other: &Self,
1757
        op: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1758
    ) -> PolarsResult<Column> {
1759
        fn output_length(a: &Column, b: &Column) -> PolarsResult<usize> {
1760
            match (a.len(), b.len()) {
1761
                // broadcasting
1762
                (1, o) | (o, 1) => Ok(o),
1763
                // equal
1764
                (a, b) if a == b => Ok(a),
1765
                // unequal
1766
                (a, b) => {
1767
                    polars_bail!(InvalidOperation: "cannot do a binary operation on columns of different lengths: got {} and {}", a, b)
1768
                },
1769
            }
1770
        }
1771

1772
        // Here we rely on the underlying broadcast operations.
1773
        let length = output_length(self, other)?;
1774
        match (self, other) {
1775
            (Column::Series(lhs), Column::Series(rhs)) => op(lhs, rhs).map(Column::from),
1776
            (Column::Series(lhs), Column::Scalar(rhs)) => {
1777
                op(lhs, &rhs.as_single_value_series()).map(Column::from)
1778
            },
1779
            (Column::Scalar(lhs), Column::Series(rhs)) => {
1780
                op(&lhs.as_single_value_series(), rhs).map(Column::from)
1781
            },
1782
            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1783
                let lhs = lhs.as_single_value_series();
1784
                let rhs = rhs.as_single_value_series();
1785

1786
                Ok(ScalarColumn::from_single_value_series(op(&lhs, &rhs)?, length).into_column())
1787
            },
1788
            // @partition-opt
1789
            (lhs, rhs) => {
1790
                op(lhs.as_materialized_series(), rhs.as_materialized_series()).map(Column::from)
1791
            },
1792
        }
1793
    }
1794

1795
    pub fn apply_binary_elementwise(
1796
        &self,
1797
        other: &Self,
1798
        f: impl Fn(&Series, &Series) -> Series,
1799
        f_lb: impl Fn(&Scalar, &Series) -> Series,
1800
        f_rb: impl Fn(&Series, &Scalar) -> Series,
1801
    ) -> Column {
1802
        self.try_apply_binary_elementwise(
1803
            other,
1804
            |lhs, rhs| Ok(f(lhs, rhs)),
1805
            |lhs, rhs| Ok(f_lb(lhs, rhs)),
1806
            |lhs, rhs| Ok(f_rb(lhs, rhs)),
1807
        )
1808
        .unwrap()
1809
    }
1810
    pub fn try_apply_binary_elementwise(
1811
        &self,
1812
        other: &Self,
1813
        f: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1814
        f_lb: impl Fn(&Scalar, &Series) -> PolarsResult<Series>,
1815
        f_rb: impl Fn(&Series, &Scalar) -> PolarsResult<Series>,
1816
    ) -> PolarsResult<Column> {
1817
        debug_assert_eq!(self.len(), other.len());
1818

1819
        match (self, other) {
1820
            (Column::Series(lhs), Column::Series(rhs)) => f(lhs, rhs).map(Column::from),
1821
            (Column::Series(lhs), Column::Scalar(rhs)) => f_rb(lhs, rhs.scalar()).map(Column::from),
1822
            (Column::Scalar(lhs), Column::Series(rhs)) => f_lb(lhs.scalar(), rhs).map(Column::from),
1823
            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1824
                let lhs = lhs.as_single_value_series();
1825
                let rhs = rhs.as_single_value_series();
1826

1827
                Ok(
1828
                    ScalarColumn::from_single_value_series(f(&lhs, &rhs)?, self.len())
1829
                        .into_column(),
1830
                )
1831
            },
1832
            // @partition-opt
1833
            (lhs, rhs) => {
1834
                f(lhs.as_materialized_series(), rhs.as_materialized_series()).map(Column::from)
1835
            },
1836
        }
1837
    }
1838

1839
    #[cfg(feature = "approx_unique")]
1840
    pub fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
1841
        match self {
1842
            Column::Series(s) => s.approx_n_unique(),
1843
            // @partition-opt
1844
            Column::Partitioned(s) => s.as_materialized_series().approx_n_unique(),
1845
            Column::Scalar(s) => {
1846
                // @NOTE: We do this for the error handling.
1847
                s.as_single_value_series().approx_n_unique()?;
1848
                Ok(1)
1849
            },
1850
        }
1851
    }
1852

1853
    pub fn n_chunks(&self) -> usize {
1854
        match self {
1855
            Column::Series(s) => s.n_chunks(),
1856
            Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()),
1857
            Column::Partitioned(s) => {
1858
                if let Some(s) = s.lazy_as_materialized_series() {
1859
                    // This should always hold for partitioned.
1860
                    debug_assert_eq!(s.n_chunks(), 1)
1861
                }
1862
                1
1863
            },
1864
        }
1865
    }
1866

1867
    #[expect(clippy::wrong_self_convention)]
1868
    pub(crate) fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
1869
        // @scalar-opt
1870
        self.as_materialized_series().into_total_ord_inner()
1871
    }
1872
    #[expect(unused, clippy::wrong_self_convention)]
1873
    pub(crate) fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
1874
        // @scalar-opt
1875
        self.as_materialized_series().into_total_eq_inner()
1876
    }
1877

1878
    pub fn rechunk_to_arrow(self, compat_level: CompatLevel) -> Box<dyn Array> {
1879
        // Rechunk to one chunk if necessary
1880
        let mut series = self.take_materialized_series();
1881
        if series.n_chunks() > 1 {
1882
            series = series.rechunk();
1883
        }
1884
        series.to_arrow(0, compat_level)
1885
    }
1886

1887
    pub fn trim_lists_to_normalized_offsets(&self) -> Option<Column> {
1888
        self.as_materialized_series()
1889
            .trim_lists_to_normalized_offsets()
1890
            .map(Column::from)
1891
    }
1892

1893
    pub fn propagate_nulls(&self) -> Option<Column> {
1894
        self.as_materialized_series()
1895
            .propagate_nulls()
1896
            .map(Column::from)
1897
    }
1898
}
1899

1900
impl Default for Column {
1901
    fn default() -> Self {
1902
        Self::new_scalar(
1903
            PlSmallStr::EMPTY,
1904
            Scalar::new(DataType::Int64, AnyValue::Null),
1905
            0,
1906
        )
1907
    }
1908
}
1909

1910
impl PartialEq for Column {
1911
    fn eq(&self, other: &Self) -> bool {
1912
        // @scalar-opt
1913
        self.as_materialized_series()
1914
            .eq(other.as_materialized_series())
1915
    }
1916
}
1917

1918
impl From<Series> for Column {
1919
    #[inline]
1920
    fn from(series: Series) -> Self {
1921
        // We instantiate a Scalar Column if the Series is length is 1. This makes it possible for
1922
        // future operations to be faster.
1923
        if series.len() == 1 {
1924
            return Self::Scalar(ScalarColumn::unit_scalar_from_series(series));
1925
        }
1926

1927
        Self::Series(SeriesColumn::new(series))
1928
    }
1929
}
1930

1931
impl<T: IntoSeries> IntoColumn for T {
1932
    #[inline]
1933
    fn into_column(self) -> Column {
1934
        self.into_series().into()
1935
    }
1936
}
1937

1938
impl IntoColumn for Column {
1939
    #[inline(always)]
1940
    fn into_column(self) -> Column {
1941
        self
1942
    }
1943
}
1944

1945
/// We don't want to serialize the scalar columns. So this helps pretend that columns are always
1946
/// initialized without implementing From<Column> for Series.
1947
///
1948
/// Those casts should be explicit.
1949
#[derive(Clone)]
1950
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
1951
#[cfg_attr(feature = "serde", serde(into = "Series"))]
1952
struct _SerdeSeries(Series);
1953

1954
impl From<Column> for _SerdeSeries {
1955
    #[inline]
1956
    fn from(value: Column) -> Self {
1957
        Self(value.take_materialized_series())
1958
    }
1959
}
1960

1961
impl From<_SerdeSeries> for Series {
1962
    #[inline]
1963
    fn from(value: _SerdeSeries) -> Self {
1964
        value.0
1965
    }
1966
}
1967

1968
Product

Resources

Company