CoCalc -- mod.rs

GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/dsl/mod.rs
⁶⁹³⁹ views
1
#![allow(ambiguous_glob_reexports)]
2
//! Domain specific language for the Lazy API.
3
#[cfg(feature = "dtype-categorical")]
4
pub mod cat;
5

6
#[cfg(feature = "dtype-categorical")]
7
pub use cat::*;
8
#[cfg(feature = "rolling_window_by")]
9
pub(crate) use polars_time::prelude::*;
10

11
mod arithmetic;
12
mod arity;
13
#[cfg(feature = "dtype-array")]
14
mod array;
15
pub mod binary;
16
#[cfg(feature = "bitwise")]
17
mod bitwise;
18
mod builder_dsl;
19
pub use builder_dsl::*;
20
mod datatype_expr;
21
#[cfg(feature = "temporal")]
22
pub mod dt;
23
mod expr;
24
mod format;
25
mod from;
26
pub mod function_expr;
27
pub mod functions;
28
mod list;
29
mod match_to_schema;
30
#[cfg(feature = "meta")]
31
mod meta;
32
mod name;
33
mod options;
34
#[cfg(feature = "python")]
35
pub mod python_dsl;
36
#[cfg(feature = "random")]
37
mod random;
38
mod scan_sources;
39
mod selector;
40
mod statistics;
41
#[cfg(feature = "strings")]
42
pub mod string;
43
#[cfg(feature = "dtype-struct")]
44
mod struct_;
45
pub mod udf;
46

47
use std::fmt::Debug;
48
use std::sync::Arc;
49

50
mod plan;
51
pub use arity::*;
52
#[cfg(feature = "dtype-array")]
53
pub use array::*;
54
pub use datatype_expr::DataTypeExpr;
55
pub use expr::*;
56
pub use function_expr::*;
57
pub use functions::*;
58
pub use list::*;
59
pub use match_to_schema::*;
60
#[cfg(feature = "meta")]
61
pub use meta::*;
62
pub use name::*;
63
pub use options::*;
64
pub use plan::*;
65
use polars_compute::rolling::QuantileMethod;
66
use polars_core::chunked_array::cast::CastOptions;
67
use polars_core::error::feature_gated;
68
use polars_core::prelude::*;
69
use polars_core::series::IsSorted;
70
#[cfg(feature = "diff")]
71
use polars_core::series::ops::NullBehavior;
72
#[cfg(feature = "is_close")]
73
use polars_utils::total_ord::TotalOrdWrap;
74
pub use selector::{DataTypeSelector, Selector, TimeUnitSet, TimeZoneSet};
75
#[cfg(feature = "dtype-struct")]
76
pub use struct_::*;
77
pub use udf::UserDefinedFunction;
78
mod file_scan;
79
pub use file_scan::*;
80
pub use scan_sources::{ScanSource, ScanSourceIter, ScanSourceRef, ScanSources};
81

82
pub use crate::plans::lit;
83
use crate::prelude::*;
84

85
impl Expr {
86
    /// Compare `Expr` with other `Expr` on equality.
87
    pub fn eq<E: Into<Expr>>(self, other: E) -> Expr {
88
        binary_expr(self, Operator::Eq, other.into())
89
    }
90

91
    /// Compare `Expr` with other `Expr` on equality where `None == None`.
92
    pub fn eq_missing<E: Into<Expr>>(self, other: E) -> Expr {
93
        binary_expr(self, Operator::EqValidity, other.into())
94
    }
95

96
    /// Compare `Expr` with other `Expr` on non-equality.
97
    pub fn neq<E: Into<Expr>>(self, other: E) -> Expr {
98
        binary_expr(self, Operator::NotEq, other.into())
99
    }
100

101
    /// Compare `Expr` with other `Expr` on non-equality where `None == None`.
102
    pub fn neq_missing<E: Into<Expr>>(self, other: E) -> Expr {
103
        binary_expr(self, Operator::NotEqValidity, other.into())
104
    }
105

106
    /// Check if `Expr` < `Expr`.
107
    pub fn lt<E: Into<Expr>>(self, other: E) -> Expr {
108
        binary_expr(self, Operator::Lt, other.into())
109
    }
110

111
    /// Check if `Expr` > `Expr`.
112
    pub fn gt<E: Into<Expr>>(self, other: E) -> Expr {
113
        binary_expr(self, Operator::Gt, other.into())
114
    }
115

116
    /// Check if `Expr` >= `Expr`.
117
    pub fn gt_eq<E: Into<Expr>>(self, other: E) -> Expr {
118
        binary_expr(self, Operator::GtEq, other.into())
119
    }
120

121
    /// Check if `Expr` <= `Expr`.
122
    pub fn lt_eq<E: Into<Expr>>(self, other: E) -> Expr {
123
        binary_expr(self, Operator::LtEq, other.into())
124
    }
125

126
    /// Negate `Expr`.
127
    #[allow(clippy::should_implement_trait)]
128
    pub fn not(self) -> Expr {
129
        self.map_unary(BooleanFunction::Not)
130
    }
131

132
    /// Rename Column.
133
    pub fn alias<S>(self, name: S) -> Expr
134
    where
135
        S: Into<PlSmallStr>,
136
    {
137
        Expr::Alias(Arc::new(self), name.into())
138
    }
139

140
    /// Run is_null operation on `Expr`.
141
    #[allow(clippy::wrong_self_convention)]
142
    pub fn is_null(self) -> Self {
143
        self.map_unary(BooleanFunction::IsNull)
144
    }
145

146
    /// Run is_not_null operation on `Expr`.
147
    #[allow(clippy::wrong_self_convention)]
148
    pub fn is_not_null(self) -> Self {
149
        self.map_unary(BooleanFunction::IsNotNull)
150
    }
151

152
    /// Drop null values.
153
    pub fn drop_nulls(self) -> Self {
154
        self.map_unary(FunctionExpr::DropNulls)
155
    }
156

157
    /// Drop NaN values.
158
    pub fn drop_nans(self) -> Self {
159
        self.map_unary(FunctionExpr::DropNans)
160
    }
161

162
    /// Get the number of unique values in the groups.
163
    pub fn n_unique(self) -> Self {
164
        AggExpr::NUnique(Arc::new(self)).into()
165
    }
166

167
    /// Get the first value in the group.
168
    pub fn first(self) -> Self {
169
        AggExpr::First(Arc::new(self)).into()
170
    }
171

172
    /// Get the last value in the group.
173
    pub fn last(self) -> Self {
174
        AggExpr::Last(Arc::new(self)).into()
175
    }
176

177
    /// GroupBy the group to a Series.
178
    pub fn implode(self) -> Self {
179
        AggExpr::Implode(Arc::new(self)).into()
180
    }
181

182
    /// Compute the quantile per group.
183
    pub fn quantile(self, quantile: Expr, method: QuantileMethod) -> Self {
184
        AggExpr::Quantile {
185
            expr: Arc::new(self),
186
            quantile: Arc::new(quantile),
187
            method,
188
        }
189
        .into()
190
    }
191

192
    /// Get the group indexes of the group by operation.
193
    pub fn agg_groups(self) -> Self {
194
        AggExpr::AggGroups(Arc::new(self)).into()
195
    }
196

197
    /// Alias for `explode`.
198
    pub fn flatten(self) -> Self {
199
        self.explode()
200
    }
201

202
    /// Explode the String/List column.
203
    pub fn explode(self) -> Self {
204
        Expr::Explode {
205
            input: Arc::new(self),
206
            skip_empty: false,
207
        }
208
    }
209

210
    /// Slice the Series.
211
    /// `offset` may be negative.
212
    pub fn slice<E: Into<Expr>, F: Into<Expr>>(self, offset: E, length: F) -> Self {
213
        Expr::Slice {
214
            input: Arc::new(self),
215
            offset: Arc::new(offset.into()),
216
            length: Arc::new(length.into()),
217
        }
218
    }
219

220
    /// Append expressions. This is done by adding the chunks of `other` to this [`Series`].
221
    pub fn append<E: Into<Expr>>(self, other: E, upcast: bool) -> Self {
222
        self.map_binary(FunctionExpr::Append { upcast }, other.into())
223
    }
224

225
    /// Collect all chunks into a single chunk before continuing.
226
    pub fn rechunk(self) -> Self {
227
        self.map_unary(FunctionExpr::Rechunk)
228
    }
229

230
    /// Get the first `n` elements of the Expr result.
231
    pub fn head(self, length: Option<usize>) -> Self {
232
        self.slice(lit(0), lit(length.unwrap_or(10) as u64))
233
    }
234

235
    /// Get the last `n` elements of the Expr result.
236
    pub fn tail(self, length: Option<usize>) -> Self {
237
        let len = length.unwrap_or(10);
238
        self.slice(lit(-(len as i64)), lit(len as u64))
239
    }
240

241
    /// Get unique values of this expression.
242
    pub fn unique(self) -> Self {
243
        self.map_unary(FunctionExpr::Unique(false))
244
    }
245

246
    /// Get unique values of this expression, while maintaining order.
247
    /// This requires more work than [`Expr::unique`].
248
    pub fn unique_stable(self) -> Self {
249
        self.map_unary(FunctionExpr::Unique(true))
250
    }
251

252
    /// Get the first index of unique values of this expression.
253
    pub fn arg_unique(self) -> Self {
254
        self.map_unary(FunctionExpr::ArgUnique)
255
    }
256

257
    /// Get the index value that has the minimum value.
258
    pub fn arg_min(self) -> Self {
259
        self.map_unary(FunctionExpr::ArgMin)
260
    }
261

262
    /// Get the index value that has the maximum value.
263
    pub fn arg_max(self) -> Self {
264
        self.map_unary(FunctionExpr::ArgMax)
265
    }
266

267
    /// Get the index values that would sort this expression.
268
    pub fn arg_sort(self, descending: bool, nulls_last: bool) -> Self {
269
        self.map_unary(FunctionExpr::ArgSort {
270
            descending,
271
            nulls_last,
272
        })
273
    }
274

275
    #[cfg(feature = "index_of")]
276
    /// Find the index of a value.
277
    pub fn index_of<E: Into<Expr>>(self, element: E) -> Expr {
278
        self.map_binary(FunctionExpr::IndexOf, element.into())
279
    }
280

281
    #[cfg(feature = "search_sorted")]
282
    /// Find indices where elements should be inserted to maintain order.
283
    pub fn search_sorted<E: Into<Expr>>(
284
        self,
285
        element: E,
286
        side: SearchSortedSide,
287
        descending: bool,
288
    ) -> Expr {
289
        self.map_binary(
290
            FunctionExpr::SearchSorted { side, descending },
291
            element.into(),
292
        )
293
    }
294

295
    /// Cast expression to another data type.
296
    /// Throws an error if conversion had overflows.
297
    /// Returns an Error if cast is invalid on rows after predicates are pushed down.
298
    pub fn strict_cast(self, dtype: impl Into<DataTypeExpr>) -> Self {
299
        Expr::Cast {
300
            expr: Arc::new(self),
301
            dtype: dtype.into(),
302
            options: CastOptions::Strict,
303
        }
304
    }
305

306
    /// Cast expression to another data type.
307
    pub fn cast(self, dtype: impl Into<DataTypeExpr>) -> Self {
308
        Expr::Cast {
309
            expr: Arc::new(self),
310
            dtype: dtype.into(),
311
            options: CastOptions::NonStrict,
312
        }
313
    }
314

315
    /// Cast expression to another data type.
316
    pub fn cast_with_options(
317
        self,
318
        dtype: impl Into<DataTypeExpr>,
319
        cast_options: CastOptions,
320
    ) -> Self {
321
        Expr::Cast {
322
            expr: Arc::new(self),
323
            dtype: dtype.into(),
324
            options: cast_options,
325
        }
326
    }
327

328
    /// Take the values by idx.
329
    pub fn gather<E: Into<Expr>>(self, idx: E) -> Self {
330
        Expr::Gather {
331
            expr: Arc::new(self),
332
            idx: Arc::new(idx.into()),
333
            returns_scalar: false,
334
        }
335
    }
336

337
    /// Take the values by a single index.
338
    pub fn get<E: Into<Expr>>(self, idx: E) -> Self {
339
        Expr::Gather {
340
            expr: Arc::new(self),
341
            idx: Arc::new(idx.into()),
342
            returns_scalar: true,
343
        }
344
    }
345

346
    /// Sort with given options.
347
    ///
348
    /// # Example
349
    ///
350
    /// ```rust
351
    /// # use polars_core::prelude::*;
352
    /// # use polars_lazy::prelude::*;
353
    /// # fn main() -> PolarsResult<()> {
354
    /// let lf = df! {
355
    ///    "a" => [Some(5), Some(4), Some(3), Some(2), None]
356
    /// }?
357
    /// .lazy();
358
    ///
359
    /// let sorted = lf
360
    ///     .select(
361
    ///         vec![col("a").sort(SortOptions::default())],
362
    ///     )
363
    ///     .collect()?;
364
    ///
365
    /// assert_eq!(
366
    ///     sorted,
367
    ///     df! {
368
    ///         "a" => [None, Some(2), Some(3), Some(4), Some(5)]
369
    ///     }?
370
    /// );
371
    /// # Ok(())
372
    /// # }
373
    /// ```
374
    /// See [`SortOptions`] for more options.
375
    pub fn sort(self, options: SortOptions) -> Self {
376
        Expr::Sort {
377
            expr: Arc::new(self),
378
            options,
379
        }
380
    }
381

382
    /// Returns the `k` largest elements.
383
    ///
384
    /// This has time complexity `O(n + k log(n))`.
385
    #[cfg(feature = "top_k")]
386
    pub fn top_k(self, k: Expr) -> Self {
387
        self.map_binary(FunctionExpr::TopK { descending: false }, k)
388
    }
389

390
    /// Returns the `k` largest rows by given column.
391
    ///
392
    /// For single column, use [`Expr::top_k`].
393
    #[cfg(feature = "top_k")]
394
    pub fn top_k_by<K: Into<Expr>, E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
395
        self,
396
        k: K,
397
        by: E,
398
        descending: Vec<bool>,
399
    ) -> Self {
400
        self.map_n_ary(
401
            FunctionExpr::TopKBy { descending },
402
            [k.into()]
403
                .into_iter()
404
                .chain(by.as_ref().iter().map(|e| -> Expr { e.clone().into() })),
405
        )
406
    }
407

408
    /// Returns the `k` smallest elements.
409
    ///
410
    /// This has time complexity `O(n + k log(n))`.
411
    #[cfg(feature = "top_k")]
412
    pub fn bottom_k(self, k: Expr) -> Self {
413
        self.map_binary(FunctionExpr::TopK { descending: true }, k)
414
    }
415

416
    /// Returns the `k` smallest rows by given column.
417
    ///
418
    /// For single column, use [`Expr::bottom_k`].
419
    #[cfg(feature = "top_k")]
420
    pub fn bottom_k_by<K: Into<Expr>, E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
421
        self,
422
        k: K,
423
        by: E,
424
        descending: Vec<bool>,
425
    ) -> Self {
426
        let descending = descending.into_iter().map(|x| !x).collect();
427
        self.map_n_ary(
428
            FunctionExpr::TopKBy { descending },
429
            [k.into()]
430
                .into_iter()
431
                .chain(by.as_ref().iter().map(|e| -> Expr { e.clone().into() })),
432
        )
433
    }
434

435
    /// Reverse column
436
    pub fn reverse(self) -> Self {
437
        self.map_unary(FunctionExpr::Reverse)
438
    }
439

440
    /// Apply a function/closure once the logical plan get executed.
441
    ///
442
    /// This function is very similar to [`Expr::apply`], but differs in how it handles aggregations.
443
    ///
444
    ///  * `map` should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
445
    ///  * `apply` should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
446
    ///
447
    /// It is the responsibility of the caller that the schema is correct by giving
448
    /// the correct output_type. If None given the output type of the input expr is used.
449
    pub fn map<F, DT>(self, function: F, output_type: DT) -> Self
450
    where
451
        F: Fn(Column) -> PolarsResult<Column> + 'static + Send + Sync,
452
        DT: Fn(&Schema, &Field) -> PolarsResult<Field> + 'static + Send + Sync,
453
    {
454
        self.map_with_fmt_str(function, output_type, "map")
455
    }
456

457
    pub fn map_with_fmt_str<F, DT>(
458
        self,
459
        function: F,
460
        output_type: DT,
461
        fmt_str: impl Into<PlSmallStr>,
462
    ) -> Self
463
    where
464
        F: Fn(Column) -> PolarsResult<Column> + 'static + Send + Sync,
465
        DT: Fn(&Schema, &Field) -> PolarsResult<Field> + 'static + Send + Sync,
466
    {
467
        let f = BaseColumnUdf::new(
468
            move |c: &mut [Column]| function(std::mem::take(&mut c[0])),
469
            move |schema: &Schema, fields: &[Field]| output_type(schema, &fields[0]),
470
        );
471

472
        let options =
473
            FunctionOptions::elementwise().with_flags(|f| f | FunctionFlags::OPTIONAL_RE_ENTRANT);
474
        let fmt_str = Box::new(fmt_str.into());
475
        Expr::AnonymousFunction {
476
            input: vec![self],
477
            function: new_column_udf(f),
478
            options,
479
            fmt_str,
480
        }
481
    }
482

483
    pub fn agg_with_fmt_str<F, DT>(
484
        self,
485
        function: F,
486
        output_type: DT,
487
        fmt_str: impl Into<PlSmallStr>,
488
    ) -> Self
489
    where
490
        F: Fn(Column) -> PolarsResult<Column> + 'static + Send + Sync,
491
        DT: Fn(&Schema, &Field) -> PolarsResult<Field> + 'static + Send + Sync,
492
    {
493
        let f = BaseColumnUdf::new(
494
            move |c: &mut [Column]| function(std::mem::take(&mut c[0])),
495
            move |schema: &Schema, fields: &[Field]| output_type(schema, &fields[0]),
496
        );
497

498
        let options = FunctionOptions::aggregation();
499
        let fmt_str = Box::new(fmt_str.into());
500
        Expr::AnonymousFunction {
501
            input: vec![self],
502
            function: new_column_udf(f),
503
            options,
504
            fmt_str,
505
        }
506
    }
507

508
    pub fn apply_with_fmt_str<F, DT>(
509
        self,
510
        function: F,
511
        output_type: DT,
512
        fmt_str: impl Into<PlSmallStr>,
513
    ) -> Self
514
    where
515
        F: Fn(Column) -> PolarsResult<Column> + 'static + Send + Sync,
516
        DT: Fn(&Schema, &Field) -> PolarsResult<Field> + 'static + Send + Sync,
517
    {
518
        let f = BaseColumnUdf::new(
519
            move |c: &mut [Column]| function(std::mem::take(&mut c[0])),
520
            move |schema: &Schema, fields: &[Field]| output_type(schema, &fields[0]),
521
        );
522

523
        let options = FunctionOptions::groupwise();
524
        let fmt_str = Box::new(fmt_str.into());
525
        Expr::AnonymousFunction {
526
            input: vec![self],
527
            function: new_column_udf(f),
528
            options,
529
            fmt_str,
530
        }
531
    }
532

533
    /// Apply a function/closure once the logical plan get executed with many arguments.
534
    ///
535
    /// See the [`Expr::map`] function for the differences between [`map`](Expr::map) and [`apply`](Expr::apply).
536
    pub fn map_many<F, DT>(self, function: F, arguments: &[Expr], output_type: DT) -> Self
537
    where
538
        F: Fn(&mut [Column]) -> PolarsResult<Column> + 'static + Send + Sync,
539
        DT: Fn(&Schema, &[Field]) -> PolarsResult<Field> + 'static + Send + Sync,
540
    {
541
        let mut input = vec![self];
542
        input.extend_from_slice(arguments);
543

544
        let function = BaseColumnUdf::new(function, output_type);
545

546
        let options = FunctionOptions::elementwise();
547
        Expr::AnonymousFunction {
548
            input,
549
            function: new_column_udf(function),
550
            options,
551
            fmt_str: Box::new(PlSmallStr::EMPTY),
552
        }
553
    }
554

555
    /// Apply a function/closure over the groups. This should only be used in a group_by aggregation.
556
    ///
557
    /// It is the responsibility of the caller that the schema is correct by giving
558
    /// the correct output_type. If None given the output type of the input expr is used.
559
    ///
560
    /// This difference with [map](Self::map) is that `apply` will create a separate `Series` per group.
561
    ///
562
    /// * `map` should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
563
    /// * `apply` should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
564
    pub fn apply<F, DT>(self, function: F, output_type: DT) -> Self
565
    where
566
        F: Fn(Column) -> PolarsResult<Column> + 'static + Send + Sync,
567
        DT: Fn(&Schema, &Field) -> PolarsResult<Field> + 'static + Send + Sync,
568
    {
569
        self.apply_with_fmt_str(function, output_type, PlSmallStr::EMPTY)
570
    }
571

572
    /// Apply a function/closure over the groups with many arguments. This should only be used in a group_by aggregation.
573
    ///
574
    /// See the [`Expr::apply`] function for the differences between [`map`](Expr::map) and [`apply`](Expr::apply).
575
    pub fn apply_many<F, DT>(self, function: F, arguments: &[Expr], output_type: DT) -> Self
576
    where
577
        F: Fn(&mut [Column]) -> PolarsResult<Column> + 'static + Send + Sync,
578
        DT: Fn(&Schema, &[Field]) -> PolarsResult<Field> + 'static + Send + Sync,
579
    {
580
        let mut input = vec![self];
581
        input.extend_from_slice(arguments);
582

583
        let function = BaseColumnUdf::new(function, output_type);
584

585
        let options = FunctionOptions::groupwise();
586
        Expr::AnonymousFunction {
587
            input,
588
            function: new_column_udf(function),
589
            options,
590
            fmt_str: Box::new(PlSmallStr::EMPTY),
591
        }
592
    }
593

594
    /// Get mask of finite values if dtype is Float.
595
    #[allow(clippy::wrong_self_convention)]
596
    pub fn is_finite(self) -> Self {
597
        self.map_unary(BooleanFunction::IsFinite)
598
    }
599

600
    /// Get mask of infinite values if dtype is Float.
601
    #[allow(clippy::wrong_self_convention)]
602
    pub fn is_infinite(self) -> Self {
603
        self.map_unary(BooleanFunction::IsInfinite)
604
    }
605

606
    /// Get mask of NaN values if dtype is Float.
607
    pub fn is_nan(self) -> Self {
608
        self.map_unary(BooleanFunction::IsNan)
609
    }
610

611
    /// Get inverse mask of NaN values if dtype is Float.
612
    pub fn is_not_nan(self) -> Self {
613
        self.map_unary(BooleanFunction::IsNotNan)
614
    }
615

616
    /// Shift the values in the array by some period. See [the eager implementation](polars_core::series::SeriesTrait::shift).
617
    pub fn shift(self, n: Expr) -> Self {
618
        self.map_binary(FunctionExpr::Shift, n)
619
    }
620

621
    /// Shift the values in the array by some period and fill the resulting empty values.
622
    pub fn shift_and_fill<E: Into<Expr>, IE: Into<Expr>>(self, n: E, fill_value: IE) -> Self {
623
        self.map_ternary(FunctionExpr::ShiftAndFill, n.into(), fill_value.into())
624
    }
625

626
    /// Cumulatively count values from 0 to len.
627
    #[cfg(feature = "cum_agg")]
628
    pub fn cumulative_eval(self, evaluation: Expr, min_samples: usize) -> Self {
629
        Expr::Eval {
630
            expr: Arc::new(self),
631
            evaluation: Arc::new(evaluation),
632
            variant: EvalVariant::Cumulative { min_samples },
633
        }
634
    }
635

636
    /// Cumulatively count values from 0 to len.
637
    #[cfg(feature = "cum_agg")]
638
    pub fn cum_count(self, reverse: bool) -> Self {
639
        self.map_unary(FunctionExpr::CumCount { reverse })
640
    }
641

642
    /// Get an array with the cumulative sum computed at every element.
643
    #[cfg(feature = "cum_agg")]
644
    pub fn cum_sum(self, reverse: bool) -> Self {
645
        self.map_unary(FunctionExpr::CumSum { reverse })
646
    }
647

648
    /// Get an array with the cumulative product computed at every element.
649
    #[cfg(feature = "cum_agg")]
650
    pub fn cum_prod(self, reverse: bool) -> Self {
651
        self.map_unary(FunctionExpr::CumProd { reverse })
652
    }
653

654
    /// Get an array with the cumulative min computed at every element.
655
    #[cfg(feature = "cum_agg")]
656
    pub fn cum_min(self, reverse: bool) -> Self {
657
        self.map_unary(FunctionExpr::CumMin { reverse })
658
    }
659

660
    /// Get an array with the cumulative max computed at every element.
661
    #[cfg(feature = "cum_agg")]
662
    pub fn cum_max(self, reverse: bool) -> Self {
663
        self.map_unary(FunctionExpr::CumMax { reverse })
664
    }
665

666
    /// Get the product aggregation of an expression.
667
    pub fn product(self) -> Self {
668
        self.map_unary(FunctionExpr::Product)
669
    }
670

671
    /// Round underlying floating point array to given decimal numbers.
672
    #[cfg(feature = "round_series")]
673
    pub fn round(self, decimals: u32, mode: RoundMode) -> Self {
674
        self.map_unary(FunctionExpr::Round { decimals, mode })
675
    }
676

677
    /// Round to a number of significant figures.
678
    #[cfg(feature = "round_series")]
679
    pub fn round_sig_figs(self, digits: i32) -> Self {
680
        self.map_unary(FunctionExpr::RoundSF { digits })
681
    }
682

683
    /// Floor underlying floating point array to the lowest integers smaller or equal to the float value.
684
    #[cfg(feature = "round_series")]
685
    pub fn floor(self) -> Self {
686
        self.map_unary(FunctionExpr::Floor)
687
    }
688

689
    /// Constant Pi
690
    #[cfg(feature = "round_series")]
691
    pub fn pi() -> Self {
692
        lit(std::f64::consts::PI)
693
    }
694

695
    /// Ceil underlying floating point array to the highest integers smaller or equal to the float value.
696
    #[cfg(feature = "round_series")]
697
    pub fn ceil(self) -> Self {
698
        self.map_unary(FunctionExpr::Ceil)
699
    }
700

701
    /// Clip underlying values to a set boundary.
702
    #[cfg(feature = "round_series")]
703
    pub fn clip(self, min: Expr, max: Expr) -> Self {
704
        self.map_ternary(
705
            FunctionExpr::Clip {
706
                has_min: true,
707
                has_max: true,
708
            },
709
            min,
710
            max,
711
        )
712
    }
713

714
    /// Clip underlying values to a set boundary.
715
    #[cfg(feature = "round_series")]
716
    pub fn clip_max(self, max: Expr) -> Self {
717
        self.map_binary(
718
            FunctionExpr::Clip {
719
                has_min: false,
720
                has_max: true,
721
            },
722
            max,
723
        )
724
    }
725

726
    /// Clip underlying values to a set boundary.
727
    #[cfg(feature = "round_series")]
728
    pub fn clip_min(self, min: Expr) -> Self {
729
        self.map_binary(
730
            FunctionExpr::Clip {
731
                has_min: true,
732
                has_max: false,
733
            },
734
            min,
735
        )
736
    }
737

738
    /// Convert all values to their absolute/positive value.
739
    #[cfg(feature = "abs")]
740
    pub fn abs(self) -> Self {
741
        self.map_unary(FunctionExpr::Abs)
742
    }
743

744
    /// Apply window function over a subgroup.
745
    /// This is similar to a group_by + aggregation + self join.
746
    /// Or similar to [window functions in Postgres](https://www.postgresql.org/docs/9.1/tutorial-window.html).
747
    ///
748
    /// # Example
749
    ///
750
    /// ``` rust
751
    /// #[macro_use] extern crate polars_core;
752
    /// use polars_core::prelude::*;
753
    /// use polars_lazy::prelude::*;
754
    ///
755
    /// fn example() -> PolarsResult<()> {
756
    ///     let df = df! {
757
    ///             "groups" => &[1, 1, 2, 2, 1, 2, 3, 3, 1],
758
    ///             "values" => &[1, 2, 3, 4, 5, 6, 7, 8, 8]
759
    ///         }?;
760
    ///
761
    ///     let out = df
762
    ///      .lazy()
763
    ///      .select(&[
764
    ///          col("groups"),
765
    ///          sum("values").over([col("groups")]),
766
    ///      ])
767
    ///      .collect()?;
768
    ///     println!("{}", &out);
769
    ///     Ok(())
770
    /// }
771
    ///
772
    /// ```
773
    ///
774
    /// Outputs:
775
    ///
776
    /// ``` text
777
    /// ╭────────┬────────╮
778
    /// │ groups ┆ values │
779
    /// │ ---    ┆ ---    │
780
    /// │ i32    ┆ i32    │
781
    /// ╞════════╪════════╡
782
    /// │ 1      ┆ 16     │
783
    /// │ 1      ┆ 16     │
784
    /// │ 2      ┆ 13     │
785
    /// │ 2      ┆ 13     │
786
    /// │ …      ┆ …      │
787
    /// │ 1      ┆ 16     │
788
    /// │ 2      ┆ 13     │
789
    /// │ 3      ┆ 15     │
790
    /// │ 3      ┆ 15     │
791
    /// │ 1      ┆ 16     │
792
    /// ╰────────┴────────╯
793
    /// ```
794
    pub fn over<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(self, partition_by: E) -> Self {
795
        self.over_with_options(Some(partition_by), None, Default::default())
796
            .expect("We explicitly passed `partition_by`")
797
    }
798

799
    pub fn over_with_options<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
800
        self,
801
        partition_by: Option<E>,
802
        order_by: Option<(E, SortOptions)>,
803
        options: WindowMapping,
804
    ) -> PolarsResult<Self> {
805
        polars_ensure!(partition_by.is_some() || order_by.is_some(), InvalidOperation: "At least one of `partition_by` and `order_by` must be specified in `over`");
806
        let partition_by = if let Some(partition_by) = partition_by {
807
            partition_by
808
                .as_ref()
809
                .iter()
810
                .map(|e| e.clone().into())
811
                .collect()
812
        } else {
813
            vec![lit(1)]
814
        };
815

816
        let order_by = order_by.map(|(e, options)| {
817
            let e = e.as_ref();
818
            let e = if e.len() == 1 {
819
                Arc::new(e[0].clone().into())
820
            } else {
821
                feature_gated!["dtype-struct", {
822
                    let e = e.iter().map(|e| e.clone().into()).collect::<Vec<_>>();
823
                    Arc::new(as_struct(e))
824
                }]
825
            };
826
            (e, options)
827
        });
828

829
        Ok(Expr::Window {
830
            function: Arc::new(self),
831
            partition_by,
832
            order_by,
833
            options: options.into(),
834
        })
835
    }
836

837
    #[cfg(feature = "dynamic_group_by")]
838
    pub fn rolling(self, options: RollingGroupOptions) -> Self {
839
        // We add the index column as `partition expr` so that the optimizer will
840
        // not ignore it.
841
        let index_col = col(options.index_column.clone());
842
        Expr::Window {
843
            function: Arc::new(self),
844
            partition_by: vec![index_col],
845
            order_by: None,
846
            options: WindowType::Rolling(options),
847
        }
848
    }
849

850
    fn fill_null_impl(self, fill_value: Expr) -> Self {
851
        self.map_binary(FunctionExpr::FillNull, fill_value)
852
    }
853

854
    /// Replace the null values by a value.
855
    pub fn fill_null<E: Into<Expr>>(self, fill_value: E) -> Self {
856
        self.fill_null_impl(fill_value.into())
857
    }
858

859
    pub fn fill_null_with_strategy(self, strategy: FillNullStrategy) -> Self {
860
        self.map_unary(FunctionExpr::FillNullWithStrategy(strategy))
861
    }
862

863
    /// Replace the floating point `NaN` values by a value.
864
    pub fn fill_nan<E: Into<Expr>>(self, fill_value: E) -> Self {
865
        // we take the not branch so that self is truthy value of `when -> then -> otherwise`
866
        // and that ensure we keep the name of `self`
867

868
        when(self.clone().is_not_nan().or(self.clone().is_null()))
869
            .then(self)
870
            .otherwise(fill_value.into())
871
    }
872
    /// Count the values of the Series
873
    /// or
874
    /// Get counts of the group by operation.
875
    pub fn count(self) -> Self {
876
        AggExpr::Count {
877
            input: Arc::new(self),
878
            include_nulls: false,
879
        }
880
        .into()
881
    }
882

883
    pub fn len(self) -> Self {
884
        AggExpr::Count {
885
            input: Arc::new(self),
886
            include_nulls: true,
887
        }
888
        .into()
889
    }
890

891
    /// Get a mask of duplicated values.
892
    #[allow(clippy::wrong_self_convention)]
893
    #[cfg(feature = "is_unique")]
894
    pub fn is_duplicated(self) -> Self {
895
        self.map_unary(BooleanFunction::IsDuplicated)
896
    }
897

898
    #[allow(clippy::wrong_self_convention)]
899
    #[cfg(feature = "is_between")]
900
    pub fn is_between<E: Into<Expr>>(self, lower: E, upper: E, closed: ClosedInterval) -> Self {
901
        self.map_ternary(
902
            BooleanFunction::IsBetween { closed },
903
            lower.into(),
904
            upper.into(),
905
        )
906
    }
907

908
    /// Get a mask of unique values.
909
    #[allow(clippy::wrong_self_convention)]
910
    #[cfg(feature = "is_unique")]
911
    pub fn is_unique(self) -> Self {
912
        self.map_unary(BooleanFunction::IsUnique)
913
    }
914

915
    /// Check whether floating point values are close to each other.
916
    #[allow(clippy::wrong_self_convention)]
917
    #[cfg(feature = "is_close")]
918
    pub fn is_close<E: Into<Expr>>(
919
        self,
920
        expr: E,
921
        abs_tol: f64,
922
        rel_tol: f64,
923
        nans_equal: bool,
924
    ) -> Self {
925
        self.map_binary(
926
            BooleanFunction::IsClose {
927
                abs_tol: TotalOrdWrap(abs_tol),
928
                rel_tol: TotalOrdWrap(rel_tol),
929
                nans_equal,
930
            },
931
            expr.into(),
932
        )
933
    }
934

935
    /// Get the approximate count of unique values.
936
    #[cfg(feature = "approx_unique")]
937
    pub fn approx_n_unique(self) -> Self {
938
        self.map_unary(FunctionExpr::ApproxNUnique)
939
    }
940

941
    /// Bitwise "and" operation.
942
    pub fn and<E: Into<Expr>>(self, expr: E) -> Self {
943
        binary_expr(self, Operator::And, expr.into())
944
    }
945

946
    /// Bitwise "xor" operation.
947
    pub fn xor<E: Into<Expr>>(self, expr: E) -> Self {
948
        binary_expr(self, Operator::Xor, expr.into())
949
    }
950

951
    /// Bitwise "or" operation.
952
    pub fn or<E: Into<Expr>>(self, expr: E) -> Self {
953
        binary_expr(self, Operator::Or, expr.into())
954
    }
955

956
    /// Logical "or" operation.
957
    pub fn logical_or<E: Into<Expr>>(self, expr: E) -> Self {
958
        binary_expr(self, Operator::LogicalOr, expr.into())
959
    }
960

961
    /// Logical "and" operation.
962
    pub fn logical_and<E: Into<Expr>>(self, expr: E) -> Self {
963
        binary_expr(self, Operator::LogicalAnd, expr.into())
964
    }
965

966
    /// Filter a single column.
967
    ///
968
    /// Should be used in aggregation context. If you want to filter on a
969
    /// DataFrame level, use `LazyFrame::filter`.
970
    pub fn filter<E: Into<Expr>>(self, predicate: E) -> Self {
971
        Expr::Filter {
972
            input: Arc::new(self),
973
            by: Arc::new(predicate.into()),
974
        }
975
    }
976

977
    /// Check if the values of the left expression are in the lists of the right expr.
978
    #[allow(clippy::wrong_self_convention)]
979
    #[cfg(feature = "is_in")]
980
    pub fn is_in<E: Into<Expr>>(self, other: E, nulls_equal: bool) -> Self {
981
        let other = other.into();
982
        let function = BooleanFunction::IsIn { nulls_equal };
983
        let function = function.into();
984
        Expr::Function {
985
            input: vec![self, other],
986
            function,
987
        }
988
    }
989

990
    /// Sort this column by the ordering of another column evaluated from given expr.
991
    /// Can also be used in a group_by context to sort the groups.
992
    ///
993
    /// # Example
994
    ///
995
    /// ```rust
996
    /// # use polars_core::prelude::*;
997
    /// # use polars_lazy::prelude::*;
998
    /// # fn main() -> PolarsResult<()> {
999
    /// let lf = df! {
1000
    ///     "a" => [1, 2, 3, 4, 5],
1001
    ///     "b" => [5, 4, 3, 2, 1]
1002
    /// }?.lazy();
1003
    ///
1004
    /// let sorted = lf
1005
    ///     .select(
1006
    ///         vec![col("a").sort_by(col("b"), SortOptions::default())],
1007
    ///     )
1008
    ///     .collect()?;
1009
    ///
1010
    /// assert_eq!(
1011
    ///     sorted,
1012
    ///     df! { "a" => [5, 4, 3, 2, 1] }?
1013
    /// );
1014
    /// # Ok(())
1015
    /// # }
1016
    pub fn sort_by<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
1017
        self,
1018
        by: E,
1019
        sort_options: SortMultipleOptions,
1020
    ) -> Expr {
1021
        let by = by.as_ref().iter().map(|e| e.clone().into()).collect();
1022
        Expr::SortBy {
1023
            expr: Arc::new(self),
1024
            by,
1025
            sort_options,
1026
        }
1027
    }
1028

1029
    #[cfg(feature = "repeat_by")]
1030
    /// Repeat the column `n` times, where `n` is determined by the values in `by`.
1031
    /// This yields an `Expr` of dtype `List`.
1032
    pub fn repeat_by<E: Into<Expr>>(self, by: E) -> Expr {
1033
        self.map_binary(FunctionExpr::RepeatBy, by.into())
1034
    }
1035

1036
    #[cfg(feature = "is_first_distinct")]
1037
    #[allow(clippy::wrong_self_convention)]
1038
    /// Get a mask of the first unique value.
1039
    pub fn is_first_distinct(self) -> Expr {
1040
        self.map_unary(BooleanFunction::IsFirstDistinct)
1041
    }
1042

1043
    #[cfg(feature = "is_last_distinct")]
1044
    #[allow(clippy::wrong_self_convention)]
1045
    /// Get a mask of the last unique value.
1046
    pub fn is_last_distinct(self) -> Expr {
1047
        self.map_unary(BooleanFunction::IsLastDistinct)
1048
    }
1049

1050
    fn dot_impl(self, other: Expr) -> Expr {
1051
        (self * other).sum()
1052
    }
1053

1054
    /// Compute the dot/inner product between two expressions.
1055
    pub fn dot<E: Into<Expr>>(self, other: E) -> Expr {
1056
        self.dot_impl(other.into())
1057
    }
1058

1059
    #[cfg(feature = "mode")]
1060
    /// Compute the mode(s) of this column. This is the most occurring value.
1061
    pub fn mode(self) -> Expr {
1062
        self.map_unary(FunctionExpr::Mode)
1063
    }
1064

1065
    #[cfg(feature = "interpolate")]
1066
    /// Interpolate intermediate values.
1067
    /// Nulls at the beginning and end of the series remain null.
1068
    pub fn interpolate(self, method: InterpolationMethod) -> Expr {
1069
        self.map_unary(FunctionExpr::Interpolate(method))
1070
    }
1071

1072
    #[cfg(feature = "rolling_window_by")]
1073
    #[allow(clippy::type_complexity)]
1074
    fn finish_rolling_by(
1075
        self,
1076
        by: Expr,
1077
        options: RollingOptionsDynamicWindow,
1078
        rolling_function_by: RollingFunctionBy,
1079
    ) -> Expr {
1080
        self.map_binary(
1081
            FunctionExpr::RollingExprBy {
1082
                function_by: rolling_function_by,
1083
                options,
1084
            },
1085
            by,
1086
        )
1087
    }
1088

1089
    #[cfg(feature = "interpolate_by")]
1090
    /// Interpolate intermediate values.
1091
    /// Nulls at the beginning and end of the series remain null.
1092
    /// The `by` column provides the x-coordinates for interpolation and must not contain nulls.
1093
    pub fn interpolate_by(self, by: Expr) -> Expr {
1094
        self.map_binary(FunctionExpr::InterpolateBy, by)
1095
    }
1096

1097
    #[cfg(feature = "rolling_window")]
1098
    #[allow(clippy::type_complexity)]
1099
    fn finish_rolling(
1100
        self,
1101
        options: RollingOptionsFixedWindow,
1102
        rolling_function: RollingFunction,
1103
    ) -> Expr {
1104
        self.map_unary(FunctionExpr::RollingExpr {
1105
            function: rolling_function,
1106
            options,
1107
        })
1108
    }
1109

1110
    /// Apply a rolling minimum based on another column.
1111
    #[cfg(feature = "rolling_window_by")]
1112
    pub fn rolling_min_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1113
        self.finish_rolling_by(by, options, RollingFunctionBy::MinBy)
1114
    }
1115

1116
    /// Apply a rolling maximum based on another column.
1117
    #[cfg(feature = "rolling_window_by")]
1118
    pub fn rolling_max_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1119
        self.finish_rolling_by(by, options, RollingFunctionBy::MaxBy)
1120
    }
1121

1122
    /// Apply a rolling mean based on another column.
1123
    #[cfg(feature = "rolling_window_by")]
1124
    pub fn rolling_mean_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1125
        self.finish_rolling_by(by, options, RollingFunctionBy::MeanBy)
1126
    }
1127

1128
    /// Apply a rolling sum based on another column.
1129
    #[cfg(feature = "rolling_window_by")]
1130
    pub fn rolling_sum_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1131
        self.finish_rolling_by(by, options, RollingFunctionBy::SumBy)
1132
    }
1133

1134
    /// Apply a rolling quantile based on another column.
1135
    #[cfg(feature = "rolling_window_by")]
1136
    pub fn rolling_quantile_by(
1137
        self,
1138
        by: Expr,
1139
        method: QuantileMethod,
1140
        quantile: f64,
1141
        mut options: RollingOptionsDynamicWindow,
1142
    ) -> Expr {
1143
        use polars_compute::rolling::{RollingFnParams, RollingQuantileParams};
1144
        options.fn_params = Some(RollingFnParams::Quantile(RollingQuantileParams {
1145
            prob: quantile,
1146
            method,
1147
        }));
1148

1149
        self.finish_rolling_by(by, options, RollingFunctionBy::QuantileBy)
1150
    }
1151

1152
    /// Apply a rolling variance based on another column.
1153
    #[cfg(feature = "rolling_window_by")]
1154
    pub fn rolling_var_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1155
        self.finish_rolling_by(by, options, RollingFunctionBy::VarBy)
1156
    }
1157

1158
    /// Apply a rolling std-dev based on another column.
1159
    #[cfg(feature = "rolling_window_by")]
1160
    pub fn rolling_std_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1161
        self.finish_rolling_by(by, options, RollingFunctionBy::StdBy)
1162
    }
1163

1164
    /// Apply a rolling median based on another column.
1165
    #[cfg(feature = "rolling_window_by")]
1166
    pub fn rolling_median_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1167
        self.rolling_quantile_by(by, QuantileMethod::Linear, 0.5, options)
1168
    }
1169

1170
    /// Apply a rolling minimum.
1171
    ///
1172
    /// See: [`RollingAgg::rolling_min`]
1173
    #[cfg(feature = "rolling_window")]
1174
    pub fn rolling_min(self, options: RollingOptionsFixedWindow) -> Expr {
1175
        self.finish_rolling(options, RollingFunction::Min)
1176
    }
1177

1178
    /// Apply a rolling maximum.
1179
    ///
1180
    /// See: [`RollingAgg::rolling_max`]
1181
    #[cfg(feature = "rolling_window")]
1182
    pub fn rolling_max(self, options: RollingOptionsFixedWindow) -> Expr {
1183
        self.finish_rolling(options, RollingFunction::Max)
1184
    }
1185

1186
    /// Apply a rolling mean.
1187
    ///
1188
    /// See: [`RollingAgg::rolling_mean`]
1189
    #[cfg(feature = "rolling_window")]
1190
    pub fn rolling_mean(self, options: RollingOptionsFixedWindow) -> Expr {
1191
        self.finish_rolling(options, RollingFunction::Mean)
1192
    }
1193

1194
    /// Apply a rolling sum.
1195
    ///
1196
    /// See: [`RollingAgg::rolling_sum`]
1197
    #[cfg(feature = "rolling_window")]
1198
    pub fn rolling_sum(self, options: RollingOptionsFixedWindow) -> Expr {
1199
        self.finish_rolling(options, RollingFunction::Sum)
1200
    }
1201

1202
    /// Apply a rolling median.
1203
    ///
1204
    /// See: [`RollingAgg::rolling_median`]
1205
    #[cfg(feature = "rolling_window")]
1206
    pub fn rolling_median(self, options: RollingOptionsFixedWindow) -> Expr {
1207
        self.rolling_quantile(QuantileMethod::Linear, 0.5, options)
1208
    }
1209

1210
    /// Apply a rolling quantile.
1211
    ///
1212
    /// See: [`RollingAgg::rolling_quantile`]
1213
    #[cfg(feature = "rolling_window")]
1214
    pub fn rolling_quantile(
1215
        self,
1216
        method: QuantileMethod,
1217
        quantile: f64,
1218
        mut options: RollingOptionsFixedWindow,
1219
    ) -> Expr {
1220
        use polars_compute::rolling::{RollingFnParams, RollingQuantileParams};
1221

1222
        options.fn_params = Some(RollingFnParams::Quantile(RollingQuantileParams {
1223
            prob: quantile,
1224
            method,
1225
        }));
1226

1227
        self.finish_rolling(options, RollingFunction::Quantile)
1228
    }
1229

1230
    /// Apply a rolling variance.
1231
    #[cfg(feature = "rolling_window")]
1232
    pub fn rolling_var(self, options: RollingOptionsFixedWindow) -> Expr {
1233
        self.finish_rolling(options, RollingFunction::Var)
1234
    }
1235

1236
    /// Apply a rolling std-dev.
1237
    #[cfg(feature = "rolling_window")]
1238
    pub fn rolling_std(self, options: RollingOptionsFixedWindow) -> Expr {
1239
        self.finish_rolling(options, RollingFunction::Std)
1240
    }
1241

1242
    /// Apply a rolling skew.
1243
    #[cfg(feature = "rolling_window")]
1244
    #[cfg(feature = "moment")]
1245
    pub fn rolling_skew(self, options: RollingOptionsFixedWindow) -> Expr {
1246
        self.finish_rolling(options, RollingFunction::Skew)
1247
    }
1248

1249
    /// Apply a rolling skew.
1250
    #[cfg(feature = "rolling_window")]
1251
    #[cfg(feature = "moment")]
1252
    pub fn rolling_kurtosis(self, options: RollingOptionsFixedWindow) -> Expr {
1253
        self.finish_rolling(options, RollingFunction::Kurtosis)
1254
    }
1255

1256
    #[cfg(feature = "rolling_window")]
1257
    /// Apply a custom function over a rolling/ moving window of the array.
1258
    /// This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.
1259
    pub fn rolling_map(
1260
        self,
1261
        f: PlanCallback<Series, Series>,
1262
        options: RollingOptionsFixedWindow,
1263
    ) -> Expr {
1264
        self.finish_rolling(options, RollingFunction::Map(f))
1265
    }
1266

1267
    #[cfg(feature = "peaks")]
1268
    pub fn peak_min(self) -> Expr {
1269
        self.map_unary(FunctionExpr::PeakMin)
1270
    }
1271

1272
    #[cfg(feature = "peaks")]
1273
    pub fn peak_max(self) -> Expr {
1274
        self.map_unary(FunctionExpr::PeakMax)
1275
    }
1276

1277
    #[cfg(feature = "rank")]
1278
    /// Assign ranks to data, dealing with ties appropriately.
1279
    pub fn rank(self, options: RankOptions, seed: Option<u64>) -> Expr {
1280
        self.map_unary(FunctionExpr::Rank { options, seed })
1281
    }
1282

1283
    #[cfg(feature = "replace")]
1284
    /// Replace the given values with other values.
1285
    pub fn replace<E: Into<Expr>>(self, old: E, new: E) -> Expr {
1286
        let old = old.into();
1287
        let new = new.into();
1288
        self.map_n_ary(FunctionExpr::Replace, [old, new])
1289
    }
1290

1291
    #[cfg(feature = "replace")]
1292
    /// Replace the given values with other values.
1293
    pub fn replace_strict<E: Into<Expr>>(
1294
        self,
1295
        old: E,
1296
        new: E,
1297
        default: Option<E>,
1298
        return_dtype: Option<impl Into<DataTypeExpr>>,
1299
    ) -> Expr {
1300
        let old = old.into();
1301
        let new = new.into();
1302
        let mut args = vec![old, new];
1303
        args.extend(default.map(Into::into));
1304
        self.map_n_ary(
1305
            FunctionExpr::ReplaceStrict {
1306
                return_dtype: return_dtype.map(Into::into),
1307
            },
1308
            args,
1309
        )
1310
    }
1311

1312
    #[cfg(feature = "cutqcut")]
1313
    /// Bin continuous values into discrete categories.
1314
    pub fn cut(
1315
        self,
1316
        breaks: Vec<f64>,
1317
        labels: Option<impl IntoVec<PlSmallStr>>,
1318
        left_closed: bool,
1319
        include_breaks: bool,
1320
    ) -> Expr {
1321
        self.map_unary(FunctionExpr::Cut {
1322
            breaks,
1323
            labels: labels.map(|x| x.into_vec()),
1324
            left_closed,
1325
            include_breaks,
1326
        })
1327
    }
1328

1329
    #[cfg(feature = "cutqcut")]
1330
    /// Bin continuous values into discrete categories based on their quantiles.
1331
    pub fn qcut(
1332
        self,
1333
        probs: Vec<f64>,
1334
        labels: Option<impl IntoVec<PlSmallStr>>,
1335
        left_closed: bool,
1336
        allow_duplicates: bool,
1337
        include_breaks: bool,
1338
    ) -> Expr {
1339
        self.map_unary(FunctionExpr::QCut {
1340
            probs,
1341
            labels: labels.map(|x| x.into_vec()),
1342
            left_closed,
1343
            allow_duplicates,
1344
            include_breaks,
1345
        })
1346
    }
1347

1348
    #[cfg(feature = "cutqcut")]
1349
    /// Bin continuous values into discrete categories using uniform quantile probabilities.
1350
    pub fn qcut_uniform(
1351
        self,
1352
        n_bins: usize,
1353
        labels: Option<impl IntoVec<PlSmallStr>>,
1354
        left_closed: bool,
1355
        allow_duplicates: bool,
1356
        include_breaks: bool,
1357
    ) -> Expr {
1358
        let probs = (1..n_bins).map(|b| b as f64 / n_bins as f64).collect();
1359
        self.map_unary(FunctionExpr::QCut {
1360
            probs,
1361
            labels: labels.map(|x| x.into_vec()),
1362
            left_closed,
1363
            allow_duplicates,
1364
            include_breaks,
1365
        })
1366
    }
1367

1368
    #[cfg(feature = "rle")]
1369
    /// Get the lengths of runs of identical values.
1370
    pub fn rle(self) -> Expr {
1371
        self.map_unary(FunctionExpr::RLE)
1372
    }
1373

1374
    #[cfg(feature = "rle")]
1375
    /// Similar to `rle`, but maps values to run IDs.
1376
    pub fn rle_id(self) -> Expr {
1377
        self.map_unary(FunctionExpr::RLEID)
1378
    }
1379

1380
    #[cfg(feature = "diff")]
1381
    /// Calculate the n-th discrete difference between values.
1382
    pub fn diff(self, n: Expr, null_behavior: NullBehavior) -> Expr {
1383
        self.map_binary(FunctionExpr::Diff(null_behavior), n)
1384
    }
1385

1386
    #[cfg(feature = "pct_change")]
1387
    /// Computes percentage change between values.
1388
    pub fn pct_change(self, n: Expr) -> Expr {
1389
        self.map_binary(FunctionExpr::PctChange, n)
1390
    }
1391

1392
    #[cfg(feature = "moment")]
1393
    /// Compute the sample skewness of a data set.
1394
    ///
1395
    /// For normally distributed data, the skewness should be about zero. For
1396
    /// uni-modal continuous distributions, a skewness value greater than zero means
1397
    /// that there is more weight in the right tail of the distribution. The
1398
    /// function `skewtest` can be used to determine if the skewness value
1399
    /// is close enough to zero, statistically speaking.
1400
    ///
1401
    /// see: [scipy](https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/stats/stats.py#L1024)
1402
    pub fn skew(self, bias: bool) -> Expr {
1403
        self.map_unary(FunctionExpr::Skew(bias))
1404
    }
1405

1406
    #[cfg(feature = "moment")]
1407
    /// Compute the kurtosis (Fisher or Pearson).
1408
    ///
1409
    /// Kurtosis is the fourth central moment divided by the square of the
1410
    /// variance. If Fisher's definition is used, then 3.0 is subtracted from
1411
    /// the result to give 0.0 for a normal distribution.
1412
    /// If bias is False then the kurtosis is calculated using k statistics to
1413
    /// eliminate bias coming from biased moment estimators.
1414
    pub fn kurtosis(self, fisher: bool, bias: bool) -> Expr {
1415
        self.map_unary(FunctionExpr::Kurtosis(fisher, bias))
1416
    }
1417

1418
    /// Get maximal value that could be hold by this dtype.
1419
    pub fn upper_bound(self) -> Expr {
1420
        self.map_unary(FunctionExpr::UpperBound)
1421
    }
1422

1423
    /// Get minimal value that could be hold by this dtype.
1424
    pub fn lower_bound(self) -> Expr {
1425
        self.map_unary(FunctionExpr::LowerBound)
1426
    }
1427

1428
    #[cfg(feature = "dtype-array")]
1429
    pub fn reshape(self, dimensions: &[i64]) -> Self {
1430
        let dimensions = dimensions
1431
            .iter()
1432
            .map(|&v| ReshapeDimension::new(v))
1433
            .collect();
1434
        self.map_unary(FunctionExpr::Reshape(dimensions))
1435
    }
1436

1437
    #[cfg(feature = "ewma")]
1438
    /// Calculate the exponentially-weighted moving average.
1439
    pub fn ewm_mean(self, options: EWMOptions) -> Self {
1440
        self.map_unary(FunctionExpr::EwmMean { options })
1441
    }
1442

1443
    #[cfg(feature = "ewma_by")]
1444
    /// Calculate the exponentially-weighted moving average by a time column.
1445
    pub fn ewm_mean_by(self, times: Expr, half_life: Duration) -> Self {
1446
        self.map_binary(FunctionExpr::EwmMeanBy { half_life }, times)
1447
    }
1448

1449
    #[cfg(feature = "ewma")]
1450
    /// Calculate the exponentially-weighted moving standard deviation.
1451
    pub fn ewm_std(self, options: EWMOptions) -> Self {
1452
        self.map_unary(FunctionExpr::EwmStd { options })
1453
    }
1454

1455
    #[cfg(feature = "ewma")]
1456
    /// Calculate the exponentially-weighted moving variance.
1457
    pub fn ewm_var(self, options: EWMOptions) -> Self {
1458
        self.map_unary(FunctionExpr::EwmVar { options })
1459
    }
1460

1461
    /// Returns whether any of the values in the column are `true`.
1462
    ///
1463
    /// If `ignore_nulls` is `False`, [Kleene logic] is used to deal with nulls:
1464
    /// if the column contains any null values and no `true` values, the output
1465
    /// is null.
1466
    ///
1467
    /// [Kleene logic]: https://en.wikipedia.org/wiki/Three-valued_logic
1468
    pub fn any(self, ignore_nulls: bool) -> Self {
1469
        self.map_unary(BooleanFunction::Any { ignore_nulls })
1470
    }
1471

1472
    /// Returns whether all values in the column are `true`.
1473
    ///
1474
    /// If `ignore_nulls` is `False`, [Kleene logic] is used to deal with nulls:
1475
    /// if the column contains any null values and no `false` values, the output
1476
    /// is null.
1477
    ///
1478
    /// [Kleene logic]: https://en.wikipedia.org/wiki/Three-valued_logic
1479
    pub fn all(self, ignore_nulls: bool) -> Self {
1480
        self.map_unary(BooleanFunction::All { ignore_nulls })
1481
    }
1482

1483
    #[cfg(feature = "dtype-struct")]
1484
    /// Count all unique values and create a struct mapping value to count.
1485
    /// (Note that it is better to turn parallel off in the aggregation context).
1486
    /// The name of the struct field with the counts is given by the parameter `name`.
1487
    pub fn value_counts(self, sort: bool, parallel: bool, name: &str, normalize: bool) -> Self {
1488
        self.map_unary(FunctionExpr::ValueCounts {
1489
            sort,
1490
            parallel,
1491
            name: name.into(),
1492
            normalize,
1493
        })
1494
    }
1495

1496
    #[cfg(feature = "unique_counts")]
1497
    /// Returns a count of the unique values in the order of appearance.
1498
    /// This method differs from [`Expr::value_counts`] in that it does not return the
1499
    /// values, only the counts and might be faster.
1500
    pub fn unique_counts(self) -> Self {
1501
        self.map_unary(FunctionExpr::UniqueCounts)
1502
    }
1503

1504
    #[cfg(feature = "log")]
1505
    /// Compute the logarithm to a given base.
1506
    pub fn log(self, base: Expr) -> Self {
1507
        self.map_binary(FunctionExpr::Log, base)
1508
    }
1509

1510
    #[cfg(feature = "log")]
1511
    /// Compute the natural logarithm of all elements plus one in the input array.
1512
    pub fn log1p(self) -> Self {
1513
        self.map_unary(FunctionExpr::Log1p)
1514
    }
1515

1516
    #[cfg(feature = "log")]
1517
    /// Calculate the exponential of all elements in the input array.
1518
    pub fn exp(self) -> Self {
1519
        self.map_unary(FunctionExpr::Exp)
1520
    }
1521

1522
    #[cfg(feature = "log")]
1523
    /// Compute the entropy as `-sum(pk * log(pk))`.
1524
    /// where `pk` are discrete probabilities.
1525
    pub fn entropy(self, base: f64, normalize: bool) -> Self {
1526
        self.map_unary(FunctionExpr::Entropy { base, normalize })
1527
    }
1528
    /// Get the null count of the column/group.
1529
    pub fn null_count(self) -> Expr {
1530
        self.map_unary(FunctionExpr::NullCount)
1531
    }
1532

1533
    /// Set this `Series` as `sorted` so that downstream code can use
1534
    /// fast paths for sorted arrays.
1535
    /// # Warning
1536
    /// This can lead to incorrect results if this `Series` is not sorted!!
1537
    /// Use with care!
1538
    pub fn set_sorted_flag(self, sorted: IsSorted) -> Expr {
1539
        // This is `map`. If a column is sorted. Chunks of that column are also sorted.
1540
        self.map_unary(FunctionExpr::SetSortedFlag(sorted))
1541
    }
1542

1543
    #[cfg(feature = "row_hash")]
1544
    /// Compute the hash of every element.
1545
    pub fn hash(self, k0: u64, k1: u64, k2: u64, k3: u64) -> Expr {
1546
        self.map_unary(FunctionExpr::Hash(k0, k1, k2, k3))
1547
    }
1548

1549
    pub fn to_physical(self) -> Expr {
1550
        self.map_unary(FunctionExpr::ToPhysical)
1551
    }
1552

1553
    pub fn gather_every(self, n: usize, offset: usize) -> Expr {
1554
        self.map_unary(FunctionExpr::GatherEvery { n, offset })
1555
    }
1556

1557
    #[cfg(feature = "reinterpret")]
1558
    pub fn reinterpret(self, signed: bool) -> Expr {
1559
        self.map_unary(FunctionExpr::Reinterpret(signed))
1560
    }
1561

1562
    pub fn extend_constant(self, value: Expr, n: Expr) -> Expr {
1563
        self.map_ternary(FunctionExpr::ExtendConstant, value, n)
1564
    }
1565

1566
    #[cfg(feature = "strings")]
1567
    /// Get the [`string::StringNameSpace`]
1568
    pub fn str(self) -> string::StringNameSpace {
1569
        string::StringNameSpace(self)
1570
    }
1571

1572
    /// Get the [`binary::BinaryNameSpace`]
1573
    pub fn binary(self) -> binary::BinaryNameSpace {
1574
        binary::BinaryNameSpace(self)
1575
    }
1576

1577
    #[cfg(feature = "temporal")]
1578
    /// Get the [`dt::DateLikeNameSpace`]
1579
    pub fn dt(self) -> dt::DateLikeNameSpace {
1580
        dt::DateLikeNameSpace(self)
1581
    }
1582

1583
    /// Get the [`list::ListNameSpace`]
1584
    pub fn list(self) -> list::ListNameSpace {
1585
        list::ListNameSpace(self)
1586
    }
1587

1588
    /// Get the [`name::ExprNameNameSpace`]
1589
    pub fn name(self) -> name::ExprNameNameSpace {
1590
        name::ExprNameNameSpace(self)
1591
    }
1592

1593
    /// Get the [`array::ArrayNameSpace`].
1594
    #[cfg(feature = "dtype-array")]
1595
    pub fn arr(self) -> array::ArrayNameSpace {
1596
        array::ArrayNameSpace(self)
1597
    }
1598

1599
    /// Get the [`CategoricalNameSpace`].
1600
    #[cfg(feature = "dtype-categorical")]
1601
    pub fn cat(self) -> cat::CategoricalNameSpace {
1602
        cat::CategoricalNameSpace(self)
1603
    }
1604

1605
    /// Get the [`struct_::StructNameSpace`].
1606
    #[cfg(feature = "dtype-struct")]
1607
    pub fn struct_(self) -> struct_::StructNameSpace {
1608
        struct_::StructNameSpace(self)
1609
    }
1610

1611
    /// Get the [`meta::MetaNameSpace`]
1612
    #[cfg(feature = "meta")]
1613
    pub fn meta(self) -> meta::MetaNameSpace {
1614
        meta::MetaNameSpace(self)
1615
    }
1616
}
1617

1618
/// Apply a function/closure over multiple columns once the logical plan get executed.
1619
///
1620
/// This function is very similar to [`apply_multiple`], but differs in how it handles aggregations.
1621
///
1622
///  * [`map_multiple`] should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
1623
///  * [`apply_multiple`] should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
1624
///
1625
/// It is the responsibility of the caller that the schema is correct by giving
1626
/// the correct output_type. If None given the output type of the input expr is used.
1627
pub fn map_multiple<F, DT, E>(function: F, expr: E, output_type: DT) -> Expr
1628
where
1629
    F: Fn(&mut [Column]) -> PolarsResult<Column> + 'static + Send + Sync,
1630
    DT: Fn(&Schema, &[Field]) -> PolarsResult<Field> + 'static + Send + Sync,
1631
    E: AsRef<[Expr]>,
1632
{
1633
    let input = expr.as_ref().to_vec();
1634

1635
    let function = BaseColumnUdf::new(function, output_type);
1636

1637
    let options = FunctionOptions::elementwise();
1638
    Expr::AnonymousFunction {
1639
        input,
1640
        function: new_column_udf(function),
1641
        options,
1642
        fmt_str: Box::new(PlSmallStr::EMPTY),
1643
    }
1644
}
1645

1646
/// Apply a function/closure over the groups of multiple columns. This should only be used in a group_by aggregation.
1647
///
1648
/// It is the responsibility of the caller that the schema is correct by giving
1649
/// the correct output_type. If None given the output type of the input expr is used.
1650
///
1651
/// This difference with [`map_multiple`] is that [`apply_multiple`] will create a separate [`Series`] per group.
1652
///
1653
/// * [`map_multiple`] should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
1654
/// * [`apply_multiple`] should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
1655
pub fn apply_multiple<F, DT, E>(function: F, expr: E, output_type: DT, returns_scalar: bool) -> Expr
1656
where
1657
    F: Fn(&mut [Column]) -> PolarsResult<Column> + 'static + Send + Sync,
1658
    DT: Fn(&Schema, &[Field]) -> PolarsResult<Field> + 'static + Send + Sync,
1659
    E: AsRef<[Expr]>,
1660
{
1661
    let input = expr.as_ref().to_vec();
1662
    let options = FunctionOptions::groupwise().with_flags(|mut f| {
1663
        f.set(FunctionFlags::RETURNS_SCALAR, returns_scalar);
1664
        f
1665
    });
1666
    let function = BaseColumnUdf::new(function, output_type);
1667
    Expr::AnonymousFunction {
1668
        input,
1669
        function: new_column_udf(function),
1670
        options,
1671
        fmt_str: Box::new(PlSmallStr::EMPTY),
1672
    }
1673
}
1674

1675
/// Return the number of rows in the context.
1676
pub fn len() -> Expr {
1677
    Expr::Len
1678
}
1679

1680
/// First column in a DataFrame.
1681
pub fn first() -> Selector {
1682
    nth(0)
1683
}
1684

1685
/// Last column in a DataFrame.
1686
pub fn last() -> Selector {
1687
    nth(-1)
1688
}
1689

1690
/// Nth column in a DataFrame.
1691
pub fn nth(n: i64) -> Selector {
1692
    Selector::ByIndex {
1693
        indices: [n].into(),
1694
        strict: true,
1695
    }
1696
}
1697

1698
Product

Resources

Company