#[cfg(feature = "diff")]
use polars_core::series::ops::NullBehavior;
use super::*;
#[test]
fn test_lazy_with_column() {
let df = get_df()
.lazy()
.with_column(lit(10).alias("foo"))
.collect()
.unwrap();
assert_eq!(df.width(), 6);
assert!(df.column("foo").is_ok());
}
#[test]
fn test_lazy_exec() {
let df = get_df();
let _new = df
.clone()
.lazy()
.select([col("sepal_width"), col("variety")])
.sort(["sepal_width"], Default::default())
.collect();
let new = df
.lazy()
.filter(not(col("sepal_width").lt(lit(3.5))))
.collect()
.unwrap();
let check = new.column("sepal_width").unwrap().f64().unwrap().gt(3.4);
assert!(check.all())
}
#[test]
fn test_lazy_alias() {
let df = get_df();
let new = df
.lazy()
.select([col("sepal_width").alias("petals"), col("sepal_width")])
.collect()
.unwrap();
assert_eq!(new.get_column_names(), &["petals", "sepal_width"]);
}
#[test]
#[cfg(feature = "pivot")]
fn test_lazy_unpivot() {
let df = get_df();
let args = UnpivotArgsDSL {
on: by_name(["sepal_length", "sepal_width"], true),
index: by_name(["petal_width", "petal_length"], true),
variable_name: None,
value_name: None,
};
let out = df
.lazy()
.unpivot(args)
.filter(col("variable").eq(lit("sepal_length")))
.select([col("variable"), col("petal_width"), col("value")])
.collect()
.unwrap();
assert_eq!(out.shape(), (7, 3));
}
#[test]
fn test_lazy_drop_nulls() {
let df = df! {
"foo" => &[Some(1), None, Some(3)],
"bar" => &[Some(1), Some(2), None]
}
.unwrap();
let new = df.lazy().drop_nulls(None).collect().unwrap();
let out = df! {
"foo" => &[Some(1)],
"bar" => &[Some(1)]
}
.unwrap();
assert!(new.equals(&out));
}
#[test]
fn test_lazy_udf() {
let df = get_df();
let new = df
.lazy()
.select([col("sepal_width").map(|s| Ok(s * 200.0), |_, f| Ok(f.clone()))])
.collect()
.unwrap();
assert_eq!(
new.column("sepal_width").unwrap().f64().unwrap().get(0),
Some(700.0)
);
}
#[test]
fn test_lazy_is_null() {
let df = get_df();
let new = df
.clone()
.lazy()
.filter(col("sepal_width").is_null())
.collect()
.unwrap();
assert_eq!(new.height(), 0);
let new = df
.clone()
.lazy()
.filter(col("sepal_width").is_not_null())
.collect()
.unwrap();
assert_eq!(new.height(), df.height());
let new = df
.lazy()
.group_by([col("variety")])
.agg([col("sepal_width").min()])
.collect()
.unwrap();
assert_eq!(new.shape(), (1, 2));
}
#[test]
fn test_lazy_pushdown_through_agg() {
let df = get_df();
let new = df
.lazy()
.group_by([col("variety")])
.agg([
col("sepal_length").min(),
col("petal_length").min().alias("foo"),
])
.select([col("foo")])
.select([col("foo").alias("bar")])
.collect()
.unwrap();
assert_eq!(new.shape(), (1, 1));
let bar = new.column("bar").unwrap();
assert_eq!(bar.get(0).unwrap(), AnyValue::Float64(1.3));
}
#[test]
fn test_lazy_shift() {
let df = get_df();
let new = df
.lazy()
.select([col("sepal_width").alias("foo").shift(lit(2))])
.collect()
.unwrap();
assert_eq!(new.column("foo").unwrap().f64().unwrap().get(0), None);
}
#[test]
fn test_shift_and_fill() -> PolarsResult<()> {
let out = df![
"a" => [1, 2, 3]
]?
.lazy()
.select([col("a").shift_and_fill(lit(-1), lit(5))])
.collect()?;
let out = out.column("a")?;
assert_eq!(Vec::from(out.i32()?), &[Some(2), Some(3), Some(5)]);
Ok(())
}
#[test]
fn test_shift_and_fill_non_numeric() -> PolarsResult<()> {
let out = df![
"bool" => [true, false, true],
]?
.lazy()
.select([col("bool").shift_and_fill(1, true)])
.collect()?;
let out = out.column("bool")?;
assert_eq!(
Vec::from(out.bool()?),
&[Some(true), Some(true), Some(false)]
);
Ok(())
}
#[test]
fn test_lazy_ternary_and_predicates() {
let df = get_df();
let ldf = df
.clone()
.lazy()
.with_column(lit(3).alias("foo"))
.filter(col("foo").is_not_null());
let _new = ldf.collect().unwrap();
let ldf = df
.lazy()
.with_column(
when(col("sepal_length").lt(lit(5.0)))
.then(
lit(3),
)
.otherwise(col("sepal_width"))
.alias("foo"),
)
.filter(col("foo").gt(lit(3.0)));
let new = ldf.collect().unwrap();
let length = new.column("sepal_length").unwrap();
assert_eq!(
length,
&Column::new("sepal_length".into(), &[5.1f64, 5.0, 5.4])
);
assert_eq!(new.shape(), (3, 6));
}
#[test]
fn test_lazy_binary_ops() {
let df = df!("a" => &[1, 2, 3, 4, 5, ]).unwrap();
let new = df
.lazy()
.select([col("a").eq(lit(2)).alias("foo")])
.collect()
.unwrap();
assert_eq!(
new.column("foo")
.unwrap()
.as_materialized_series()
.sum::<i32>()
.unwrap(),
1
);
}
#[test]
fn test_lazy_query_2() {
let df = load_df();
let ldf = df
.lazy()
.with_column(col("a").map(|s| Ok(s * 2), |_, f| Ok(f.clone())))
.filter(col("a").lt(lit(2)))
.select([col("b"), col("a")]);
let new = ldf.collect().unwrap();
assert_eq!(new.shape(), (0, 2));
}
#[test]
#[cfg(feature = "csv")]
fn test_lazy_query_3() {
let _ = scan_foods_csv()
.group_by([col("calories")])
.agg([col("fats_g").max()])
.collect()
.unwrap();
}
#[test]
fn test_lazy_query_4() -> PolarsResult<()> {
let df = df! {
"uid" => [0, 0, 0, 1, 1, 1],
"day" => [1, 2, 3, 1, 2, 3],
"cumcases" => [10, 12, 15, 25, 30, 41]
}
.unwrap();
let base_df = df.lazy();
let out = base_df
.clone()
.group_by([col("uid")])
.agg([
col("day").alias("day"),
col("cumcases")
.apply(|s: Column| &s - &(s.shift(1)), |_, f| Ok(f.clone()))
.alias("diff_cases"),
])
.explode(by_name(["day", "diff_cases"], true))
.join(
base_df,
[col("uid"), col("day")],
[col("uid"), col("day")],
JoinType::Inner.into(),
)
.collect()
.unwrap();
assert_eq!(
Vec::from(out.column("diff_cases").unwrap().i32().unwrap()),
&[None, Some(2), Some(3), None, Some(5), Some(11)]
);
Ok(())
}
#[test]
fn test_lazy_query_5() {
let df = df! {
"uid" => [0, 0, 0, 1, 1, 1],
"day" => [1, 2, 4, 1, 2, 3],
"cumcases" => [10, 12, 15, 25, 30, 41]
}
.unwrap();
let out = df
.lazy()
.group_by([col("uid")])
.agg([col("day").head(Some(2))])
.collect()
.unwrap();
let s = out
.select_at_idx(1)
.unwrap()
.list()
.unwrap()
.get_as_series(0)
.unwrap();
assert_eq!(s.len(), 2);
let s = out
.select_at_idx(1)
.unwrap()
.list()
.unwrap()
.get_as_series(0)
.unwrap();
assert_eq!(s.len(), 2);
}
#[test]
#[cfg(feature = "is_in")]
fn test_lazy_query_8() -> PolarsResult<()> {
let df = df![
"A" => [1, 2, 3],
"B" => [1, 2, 3],
"C" => [1, 2, 3],
"D" => [1, 2, 3],
"E" => [1, 2, 3]
]?;
let mut selection = vec![];
for &c in &["A", "B", "C", "D", "E"] {
let e = when(col(c).is_in(col("E"), false))
.then(col("A"))
.otherwise(Null {}.lit())
.alias(c);
selection.push(e);
}
let out = df
.lazy()
.select(selection)
.filter(col("D").gt(lit(1)))
.collect()?;
assert_eq!(out.shape(), (2, 5));
Ok(())
}
#[test]
fn test_lazy_query_9() -> PolarsResult<()> {
let cities = df![
"Cities.City"=> ["Moscow", "Berlin", "Paris","Hamburg", "Lyon", "Novosibirsk"],
"Cities.Population"=> [11.92, 3.645, 2.161, 1.841, 0.513, 1.511],
"Cities.Country"=> ["Russia", "Germany", "France", "Germany", "France", "Russia"]
]?;
let sales = df![
"Sales.City"=> ["Moscow", "Berlin", "Paris", "Moscow", "Berlin", "Paris", "Moscow", "Berlin", "Paris"],
"Sales.Item"=> ["Item A", "Item A","Item A",
"Item B", "Item B","Item B",
"Item C", "Item C","Item C"],
"Sales.Amount"=> [200, 180, 100,
3, 30, 20,
90, 130, 125]
]?;
let out = sales
.lazy()
.join(
cities.lazy(),
[col("Sales.City")],
[col("Cities.City")],
JoinType::Inner.into(),
)
.group_by([col("Cities.Country")])
.agg([col("Sales.Amount").sum().alias("sum")])
.sort(["sum"], Default::default())
.collect()?;
let vals = out
.column("sum")?
.i32()?
.into_no_null_iter()
.collect::<Vec<_>>();
assert_eq!(vals, &[245, 293, 340]);
Ok(())
}
#[test]
#[cfg(all(
feature = "temporal",
feature = "dtype-datetime",
feature = "dtype-date",
feature = "dtype-duration"
))]
fn test_lazy_query_10() {
use chrono::Duration as ChronoDuration;
let date = NaiveDate::from_ymd_opt(2021, 3, 5).unwrap();
let x = DatetimeChunked::from_naive_datetime(
"x".into(),
[
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 0, 0).unwrap()),
NaiveDateTime::new(date, NaiveTime::from_hms_opt(13, 0, 0).unwrap()),
NaiveDateTime::new(date, NaiveTime::from_hms_opt(14, 0, 0).unwrap()),
],
TimeUnit::Nanoseconds,
)
.into_column();
let y = DatetimeChunked::from_naive_datetime(
"y".into(),
[
NaiveDateTime::new(date, NaiveTime::from_hms_opt(11, 0, 0).unwrap()),
NaiveDateTime::new(date, NaiveTime::from_hms_opt(11, 0, 0).unwrap()),
NaiveDateTime::new(date, NaiveTime::from_hms_opt(11, 0, 0).unwrap()),
],
TimeUnit::Nanoseconds,
)
.into_column();
let df = DataFrame::new(vec![x, y]).unwrap();
let out = df
.lazy()
.select(&[(col("x") - col("y")).alias("z")])
.collect()
.unwrap();
let z = DurationChunked::from_duration(
"z".into(),
[
ChronoDuration::try_hours(1).unwrap(),
ChronoDuration::try_hours(2).unwrap(),
ChronoDuration::try_hours(3).unwrap(),
],
TimeUnit::Nanoseconds,
)
.into_column();
assert!(out.column("z").unwrap().equals(&z));
let x = DatetimeChunked::from_naive_datetime(
"x".into(),
[
NaiveDateTime::new(date, NaiveTime::from_hms_opt(2, 0, 0).unwrap()),
NaiveDateTime::new(date, NaiveTime::from_hms_opt(3, 0, 0).unwrap()),
NaiveDateTime::new(date, NaiveTime::from_hms_opt(4, 0, 0).unwrap()),
],
TimeUnit::Milliseconds,
)
.into_column();
let y = DatetimeChunked::from_naive_datetime(
"y".into(),
[
NaiveDateTime::new(date, NaiveTime::from_hms_opt(1, 0, 0).unwrap()),
NaiveDateTime::new(date, NaiveTime::from_hms_opt(1, 0, 0).unwrap()),
NaiveDateTime::new(date, NaiveTime::from_hms_opt(1, 0, 0).unwrap()),
],
TimeUnit::Nanoseconds,
)
.into_column();
let df = DataFrame::new(vec![x, y]).unwrap();
let out = df
.lazy()
.select(&[(col("x") - col("y")).alias("z")])
.collect()
.unwrap();
assert!(
out.column("z")
.unwrap()
.equals(&z.cast(&DataType::Duration(TimeUnit::Milliseconds)).unwrap())
);
}
#[test]
#[cfg(all(
feature = "temporal",
feature = "dtype-date",
feature = "dtype-datetime"
))]
fn test_lazy_query_7() {
let date = NaiveDate::from_ymd_opt(2021, 3, 5).unwrap();
let dates = [
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 0, 0).unwrap()),
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 1, 0).unwrap()),
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 2, 0).unwrap()),
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 3, 0).unwrap()),
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 4, 0).unwrap()),
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 5, 0).unwrap()),
];
let data = vec![Some(1.), Some(2.), Some(3.), Some(4.), None, None];
let df = DataFrame::new(vec![
DatetimeChunked::from_naive_datetime("date".into(), dates, TimeUnit::Nanoseconds)
.into_column(),
Column::new("data".into(), data),
])
.unwrap();
let out = df
.lazy()
.with_column(col("data").shift(lit(-1)).alias("output"))
.with_column(col("output").shift(lit(2)).alias("shifted"))
.filter(col("date").gt(lit(NaiveDateTime::new(
date,
NaiveTime::from_hms_opt(12, 2, 0).unwrap(),
))))
.collect()
.unwrap();
let a = out
.column("shifted")
.unwrap()
.as_materialized_series()
.sum::<f64>()
.unwrap()
- 7.0;
assert!(a < 0.01 && a > -0.01);
}
#[test]
fn test_lazy_shift_and_fill_all() {
let data = &[1, 2, 3];
let df = DataFrame::new(vec![Column::new("data".into(), data)]).unwrap();
let out = df
.lazy()
.with_column(col("data").shift(lit(1)).fill_null(lit(0)).alias("output"))
.collect()
.unwrap();
assert_eq!(
Vec::from(out.column("output").unwrap().i32().unwrap()),
vec![Some(0), Some(1), Some(2)]
);
}
#[test]
fn test_lazy_shift_operation_no_filter() {
let df = df! {
"a" => &[1, 2, 3],
"b" => &[1, 2, 3]
}
.unwrap();
df.lazy()
.with_column(col("b").shift(lit(1)).alias("output"))
.collect()
.unwrap();
}
#[test]
fn test_simplify_expr() {
let df = get_df();
let plan = df
.lazy()
.select(&[lit(1.0) + lit(1.0) + col("sepal_width")])
.logical_plan;
let mut expr_arena = Arena::new();
let mut lp_arena = Arena::new();
#[allow(const_item_mutation)]
let lp_top = to_alp(
plan,
&mut expr_arena,
&mut lp_arena,
&mut OptFlags::SIMPLIFY_EXPR,
)
.unwrap();
assert!(matches!(
lp_arena.get(lp_top),
IR::Select { expr, .. } if matches!(expr_arena.get(expr[0].node()), AExpr::BinaryExpr{ left, ..} if matches!(expr_arena.get(*left), &AExpr::Literal(LiteralValue::Dyn(DynLiteralValue::Float(2.0)))))
));
}
#[test]
fn test_lazy_wildcard() {
let df = load_df();
let new = df.clone().lazy().select([col("*")]).collect().unwrap();
assert_eq!(new.shape(), (5, 3));
let new = df
.lazy()
.group_by([col("b")])
.agg([
col("*").sum().name().suffix(""),
col("*").first().name().suffix("_first"),
])
.collect()
.unwrap();
assert_eq!(new.shape(), (3, 5));
}
#[test]
fn test_lazy_reverse() {
let df = load_df();
assert!(
df.clone()
.lazy()
.reverse()
.collect()
.unwrap()
.equals_missing(&df.reverse())
)
}
#[test]
fn test_lazy_fill_null() {
let df = df! {
"a" => &[None, Some(2.0)],
"b" => &[Some(1.0), None]
}
.unwrap();
let out = df.lazy().fill_null(lit(10.0)).collect().unwrap();
let correct = df! {
"a" => &[Some(10.0), Some(2.0)],
"b" => &[Some(1.0), Some(10.0)]
}
.unwrap();
assert!(out.equals(&correct));
assert_eq!(out.get_column_names(), vec!["a", "b"])
}
#[test]
fn test_lazy_double_projection() {
let df = df! {
"foo" => &[1, 2, 3]
}
.unwrap();
df.lazy()
.select([col("foo").alias("bar")])
.select([col("bar")])
.collect()
.unwrap();
}
#[test]
fn test_type_coercion() {
let df = df! {
"foo" => &[1, 2, 3],
"bar" => &[1.0, 2.0, 3.0]
}
.unwrap();
let lp = df.lazy().select([col("foo") * col("bar")]).logical_plan;
let mut expr_arena = Arena::new();
let mut lp_arena = Arena::new();
let lp_top = to_alp(lp, &mut expr_arena, &mut lp_arena, &mut OptFlags::default()).unwrap();
if let IR::Select { expr, .. } = lp_arena.get(lp_top) {
if let AExpr::BinaryExpr { left, right, .. } = expr_arena.get(expr[0].node()) {
assert!(matches!(expr_arena.get(*left), AExpr::Cast { .. }));
assert!(matches!(expr_arena.get(*right), AExpr::Column { .. }));
} else {
panic!()
}
};
}
#[test]
#[cfg(feature = "csv")]
fn test_lazy_partition_agg() {
let df = df! {
"foo" => &[1, 1, 2, 2, 3],
"bar" => &[1.0, 1.0, 2.0, 2.0, 3.0]
}
.unwrap();
let out = df
.lazy()
.group_by([col("foo")])
.agg([col("bar").mean()])
.sort(["foo"], Default::default())
.collect()
.unwrap();
assert_eq!(
Vec::from(out.column("bar").unwrap().f64().unwrap()),
&[Some(1.0), Some(2.0), Some(3.0)]
);
let out = scan_foods_csv()
.group_by([col("category")])
.agg([col("calories")])
.sort(["category"], Default::default())
.collect()
.unwrap();
let cat_agg_list = out.select_at_idx(1).unwrap();
let fruit_series = cat_agg_list.list().unwrap().get_as_series(0).unwrap();
let fruit_list = fruit_series.i64().unwrap();
assert_eq!(
Vec::from(fruit_list),
&[
Some(60),
Some(30),
Some(50),
Some(30),
Some(60),
Some(130),
Some(50),
]
)
}
#[test]
fn test_lazy_group_by_apply() {
let df = fruits_cars();
df.lazy()
.group_by([col("fruits")])
.agg([col("cars").apply(
|s: Column| Ok(Column::new("".into(), &[s.len() as u32])),
|_, f| Ok(Field::new(f.name().clone(), DataType::UInt32)),
)])
.collect()
.unwrap();
}
#[test]
fn test_lazy_shift_and_fill() {
let df = df! {
"A" => &[1, 2, 3, 4, 5],
"B" => &[5, 4, 3, 2, 1]
}
.unwrap();
let out = df
.clone()
.lazy()
.with_column(col("A").shift_and_fill(lit(2), col("B").mean()))
.collect()
.unwrap();
assert_eq!(out.column("A").unwrap().null_count(), 0);
let out = df
.clone()
.lazy()
.with_column(col("A").shift_and_fill(lit(-2), col("B").mean()))
.collect()
.unwrap();
assert_eq!(out.column("A").unwrap().null_count(), 0);
let out = df
.lazy()
.shift_and_fill(lit(-1), col("B").std(1))
.collect()
.unwrap();
assert_eq!(out.column("A").unwrap().null_count(), 0);
}
#[test]
fn test_lazy_group_by() {
let df = df! {
"a" => &[Some(1.0), None, Some(3.0), Some(4.0), Some(5.0)],
"groups" => &["a", "a", "b", "c", "c"]
}
.unwrap();
let out = df
.lazy()
.group_by([col("groups")])
.agg([col("a").mean()])
.sort(["a"], Default::default())
.collect()
.unwrap();
assert_eq!(out.column("a").unwrap().f64().unwrap().get(0), Some(1.0));
}
#[test]
fn test_lazy_tail() {
let df = df! {
"A" => &[1, 2, 3, 4, 5],
"B" => &[5, 4, 3, 2, 1]
}
.unwrap();
let _out = df.lazy().tail(3).collect().unwrap();
}
#[test]
fn test_lazy_group_by_sort() {
let df = df! {
"a" => ["a", "b", "a", "b", "b", "c"],
"b" => [1, 2, 3, 4, 5, 6]
}
.unwrap();
let out = df
.clone()
.lazy()
.group_by([col("a")])
.agg([col("b").sort(Default::default()).first()])
.collect()
.unwrap()
.sort(["a"], Default::default())
.unwrap();
assert_eq!(
Vec::from(out.column("b").unwrap().i32().unwrap()),
[Some(1), Some(2), Some(6)]
);
let out = df
.lazy()
.group_by([col("a")])
.agg([col("b").sort(Default::default()).last()])
.collect()
.unwrap()
.sort(["a"], Default::default())
.unwrap();
assert_eq!(
Vec::from(out.column("b").unwrap().i32().unwrap()),
[Some(3), Some(5), Some(6)]
);
}
#[test]
fn test_lazy_group_by_sort_by() {
let df = df! {
"a" => ["a", "a", "a", "b", "b", "c"],
"b" => [1, 2, 3, 4, 5, 6],
"c" => [6, 1, 4, 3, 2, 1]
}
.unwrap();
let out = df
.lazy()
.group_by([col("a")])
.agg([col("b")
.sort_by(
[col("c")],
SortMultipleOptions::default().with_order_descending(true),
)
.first()])
.collect()
.unwrap()
.sort(["a"], Default::default())
.unwrap();
assert_eq!(
Vec::from(out.column("b").unwrap().i32().unwrap()),
[Some(1), Some(4), Some(6)]
);
}
#[test]
#[cfg(feature = "dtype-datetime")]
fn test_lazy_group_by_cast() {
let df = df! {
"a" => ["a", "a", "a", "b", "b", "c"],
"b" => [1, 2, 3, 4, 5, 6]
}
.unwrap();
let _out = df
.lazy()
.group_by([col("a")])
.agg([col("b")
.mean()
.cast(DataType::Datetime(TimeUnit::Nanoseconds, None))])
.collect()
.unwrap();
}
#[test]
fn test_lazy_group_by_binary_expr() {
let df = df! {
"a" => ["a", "a", "a", "b", "b", "c"],
"b" => [1, 2, 3, 4, 5, 6]
}
.unwrap();
let out = df
.lazy()
.group_by([col("a")])
.agg([col("b").mean() * lit(2)])
.sort(["a"], Default::default())
.collect()
.unwrap();
assert_eq!(
Vec::from(out.column("b").unwrap().f64().unwrap()),
[Some(4.0), Some(9.0), Some(12.0)]
);
}
#[test]
fn test_lazy_group_by_filter() -> PolarsResult<()> {
let df = df! {
"a" => ["a", "a", "a", "b", "b", "c"],
"b" => [1, 2, 3, 4, 5, 6]
}?;
let out = df
.lazy()
.group_by([col("a")])
.agg([
col("b").filter(col("a").eq(lit("a"))).sum().alias("b_sum"),
col("b")
.filter(col("a").eq(lit("a")))
.first()
.alias("b_first"),
col("b")
.filter(col("a").eq(lit("e")))
.mean()
.alias("b_mean"),
col("b")
.filter(col("a").eq(lit("a")))
.last()
.alias("b_last"),
])
.sort(["a"], SortMultipleOptions::default())
.collect()?;
assert_eq!(
Vec::from(out.column("b_sum").unwrap().i32().unwrap()),
[Some(6), Some(0), Some(0)]
);
assert_eq!(
Vec::from(out.column("b_first").unwrap().i32().unwrap()),
[Some(1), None, None]
);
assert_eq!(
Vec::from(out.column("b_mean").unwrap().f64().unwrap()),
[None, None, None]
);
assert_eq!(
Vec::from(out.column("b_last").unwrap().i32().unwrap()),
[Some(3), None, None]
);
Ok(())
}
#[test]
fn test_group_by_projection_pd_same_column() -> PolarsResult<()> {
let a = || {
let df = df![
"col1" => ["a", "ab", "abc"],
"col2" => [1, 2, 3]
]
.unwrap();
df.lazy()
.select([col("col1").alias("foo"), col("col2").alias("bar")])
};
let out = a()
.left_join(a(), col("foo"), col("foo"))
.select([col("bar")])
.collect()?;
let a = out.column("bar")?.i32()?;
assert_eq!(Vec::from(a), &[Some(1), Some(2), Some(3)]);
Ok(())
}
#[test]
fn test_group_by_sort_slice() -> PolarsResult<()> {
let df = df![
"groups" => [1, 2, 2, 3, 3, 3],
"vals" => [1, 5, 6, 3, 9, 8]
]?;
let out1 = df
.clone()
.lazy()
.sort(
["vals"],
SortMultipleOptions::default().with_order_descending(true),
)
.group_by([col("groups")])
.agg([col("vals").head(Some(2)).alias("foo")])
.sort(["groups"], Default::default())
.collect()?;
let out2 = df
.lazy()
.group_by([col("groups")])
.agg([col("vals")
.sort(SortOptions::default().with_order_descending(true))
.head(Some(2))
.alias("foo")])
.sort(["groups"], Default::default())
.collect()?;
assert!(out1.column("foo")?.equals(out2.column("foo")?));
Ok(())
}
#[test]
#[cfg(feature = "cum_agg")]
fn test_group_by_cum_sum() -> PolarsResult<()> {
let df = df![
"groups" => [1, 2, 2, 3, 3, 3],
"vals" => [1, 5, 6, 3, 9, 8]
]?;
let out = df
.lazy()
.group_by([col("groups")])
.agg([col("vals").cum_sum(false)])
.sort(["groups"], Default::default())
.collect()?;
assert_eq!(
Vec::from(out.column("vals")?.explode(false)?.i32()?),
[1, 5, 11, 3, 12, 20]
.iter()
.copied()
.map(Some)
.collect::<Vec<_>>()
);
Ok(())
}
#[test]
#[cfg(feature = "range")]
fn test_arg_sort_multiple() -> PolarsResult<()> {
let df = df![
"int" => [1, 2, 3, 1, 2],
"flt" => [3.0, 2.0, 1.0, 2.0, 1.0],
"str" => ["a", "a", "a", "b", "b"]
]?;
let out = df
.clone()
.lazy()
.select([arg_sort_by(
[col("int"), col("flt")],
SortMultipleOptions::default().with_order_descending_multi([true, false]),
)])
.collect()?;
assert_eq!(
Vec::from(out.column("int")?.idx()?),
[2, 4, 1, 3, 0]
.iter()
.copied()
.map(Some)
.collect::<Vec<_>>()
);
let _out = df
.lazy()
.select([arg_sort_by(
[col("str"), col("flt")],
SortMultipleOptions::default().with_order_descending_multi([true, false]),
)])
.collect()?;
Ok(())
}
#[test]
fn test_multiple_explode() -> PolarsResult<()> {
let df = df![
"a" => [0, 1, 2, 0, 2],
"b" => [5, 4, 3, 2, 1],
"c" => [2, 3, 4, 1, 5]
]?;
let out = df
.lazy()
.group_by([col("a")])
.agg([col("b").alias("b_list"), col("c").alias("c_list")])
.explode(by_name(["c_list", "b_list"], true))
.collect()?;
assert_eq!(out.shape(), (5, 3));
Ok(())
}
#[test]
fn test_filter_and_alias() -> PolarsResult<()> {
let df = df![
"a" => [0, 1, 2, 0, 2]
]?;
let out = df
.lazy()
.with_column(col("a").pow(2.0).alias("a_squared"))
.filter(col("a_squared").gt(lit(1)).and(col("a").gt(lit(1))))
.collect()?;
let expected = df![
"a" => [2, 2],
"a_squared" => [4.0, 4.0]
]?;
assert!(out.equals(&expected));
Ok(())
}
#[test]
fn test_filter_lit() {
let iter = (0..100).map(|i| ('A'..='Z').nth(i % 26).unwrap().to_string());
let a = Series::from_iter(iter).into_column();
let df = DataFrame::new([a].into()).unwrap();
let out = df.lazy().filter(lit(true)).collect().unwrap();
assert_eq!(out.shape(), (100, 1));
}
#[test]
fn test_ternary_null() -> PolarsResult<()> {
let df = df![
"a" => ["a", "b", "c"]
]?;
let out = df
.lazy()
.select([when(col("a").eq(lit("c")))
.then(Null {}.lit())
.otherwise(col("a"))
.alias("foo")])
.collect()?;
assert_eq!(
out.column("foo")?.is_null().into_iter().collect::<Vec<_>>(),
&[Some(false), Some(false), Some(true)]
);
Ok(())
}
#[test]
fn test_fill_forward() -> PolarsResult<()> {
let df = df![
"a" => ["a", "b", "a"],
"b" => [Some(1), None, None]
]?;
let out = df
.lazy()
.select([col("b")
.fill_null_with_strategy(FillNullStrategy::Forward(FillNullLimit::None))
.over_with_options(Some([col("a")]), None, WindowMapping::Join)?])
.collect()?;
let agg = out.column("b")?.list()?;
let a: Series = agg.get_as_series(0).unwrap();
assert!(a.equals(&Series::new("b".into(), &[1, 1])));
let a: Series = agg.get_as_series(2).unwrap();
assert!(a.equals(&Series::new("b".into(), &[1, 1])));
let a: Series = agg.get_as_series(1).unwrap();
assert_eq!(a.null_count(), 1);
Ok(())
}
#[cfg(feature = "cross_join")]
#[test]
fn test_cross_join() -> PolarsResult<()> {
let df1 = df![
"a" => ["a", "b", "a"],
"b" => [Some(1), None, None]
]?;
let df2 = df![
"a" => [1, 2],
"b" => [None, Some(12)]
]?;
let out = df1.lazy().cross_join(df2.lazy(), None).collect()?;
assert_eq!(out.shape(), (6, 4));
Ok(())
}
#[test]
fn test_select_empty_df() -> PolarsResult<()> {
let df1 = df![
"a" => [1, 2, 3],
"b" => [1, 2, 3]
]?;
let out = df1
.lazy()
.filter(col("a").eq(lit(0)))
.select([col("a"), lit(1).alias("c")])
.collect()?;
assert_eq!(out.column("a")?.len(), 0);
assert_eq!(out.column("c")?.len(), 0);
Ok(())
}
#[test]
fn test_keep_name() -> PolarsResult<()> {
let df = df![
"a" => [1, 2, 3],
"b" => [1, 2, 3]
]?;
let out = df
.lazy()
.select([
col("a").alias("bar").name().keep(),
col("b").alias("bar").name().keep(),
])
.collect()?;
assert_eq!(out.get_column_names(), &["a", "b"]);
Ok(())
}
#[test]
fn test_exclude() -> PolarsResult<()> {
let df = df![
"a" => [1, 2, 3],
"b" => [1, 2, 3],
"c" => [1, 2, 3]
]?;
let out = df
.lazy()
.select([all().exclude_cols(["b"]).as_expr()])
.collect()?;
assert_eq!(out.get_column_names(), &["a", "c"]);
Ok(())
}
#[test]
#[cfg(feature = "regex")]
fn test_regex_selection() -> PolarsResult<()> {
let df = df![
"anton" => [1, 2, 3],
"arnold schwars" => [1, 2, 3],
"annie" => [1, 2, 3]
]?;
let out = df.lazy().select([col("^a.*o.*$")]).collect()?;
assert_eq!(out.get_column_names(), &["anton", "arnold schwars"]);
Ok(())
}
#[test]
fn test_sort_by() -> PolarsResult<()> {
let df = df![
"a" => [1, 2, 3, 4, 5],
"b" => [1, 1, 1, 2, 2],
"c" => [2, 3, 1, 2, 1]
]?;
let out = df
.clone()
.lazy()
.select([col("a").sort_by([col("b"), col("c")], SortMultipleOptions::default())])
.collect()?;
let a = out.column("a")?;
assert_eq!(
Vec::from(a.i32().unwrap()),
&[Some(3), Some(1), Some(2), Some(5), Some(4)]
);
let out = df
.clone()
.lazy()
.group_by_stable([col("b")])
.agg([col("a").sort_by([col("b"), col("c")], SortMultipleOptions::default())])
.collect()?;
let a = out.column("a")?.explode(false)?;
assert_eq!(
Vec::from(a.i32().unwrap()),
&[Some(3), Some(1), Some(2), Some(5), Some(4)]
);
let out = df
.lazy()
.group_by_stable([col("b")])
.agg([col("a").sort_by([col("b"), col("c")], SortMultipleOptions::default())])
.collect()?;
let a = out.column("a")?.explode(false)?;
assert_eq!(
Vec::from(a.i32().unwrap()),
&[Some(3), Some(1), Some(2), Some(5), Some(4)]
);
Ok(())
}
#[test]
fn test_filter_after_shift_in_groups() -> PolarsResult<()> {
let df = fruits_cars();
let out = df
.lazy()
.select([
col("fruits"),
col("B")
.shift(lit(1))
.filter(col("B").shift(lit(1)).gt(lit(4)))
.over_with_options(Some([col("fruits")]), None, WindowMapping::Join)?
.alias("filtered"),
])
.collect()?;
assert_eq!(
out.column("filtered")?
.list()?
.get_as_series(0)
.unwrap()
.i32()?
.get(0)
.unwrap(),
5
);
assert_eq!(
out.column("filtered")?
.list()?
.get_as_series(1)
.unwrap()
.i32()?
.get(0)
.unwrap(),
5
);
assert_eq!(
out.column("filtered")?
.list()?
.get_as_series(2)
.unwrap()
.len(),
0
);
Ok(())
}
#[test]
fn test_lazy_ternary_predicate_pushdown() -> PolarsResult<()> {
let df = df![
"a" => &[10, 1, 2, 3]
]?;
let out = df
.lazy()
.select([when(col("a").eq(lit(10)))
.then(Null {}.lit())
.otherwise(col("a"))])
.drop_nulls(None)
.collect()?;
assert_eq!(
Vec::from(out.get_columns()[0].i32()?),
&[Some(1), Some(2), Some(3)]
);
Ok(())
}
#[test]
#[cfg(feature = "dtype-categorical")]
fn test_categorical_addition() -> PolarsResult<()> {
let df = fruits_cars();
let out = df
.lazy()
.select([
col("fruits").cast(DataType::from_categories(Categories::global())),
col("cars").cast(DataType::from_categories(Categories::global())),
])
.select([(col("fruits") + lit(" ") + col("cars")).alias("foo")])
.collect()?;
assert_eq!(out.column("foo")?.str()?.get(0).unwrap(), "banana beetle");
Ok(())
}
#[test]
fn test_error_duplicate_names() {
let df = fruits_cars();
assert!(df.lazy().select([col("*"), col("*")]).collect().is_err());
}
#[test]
fn test_filter_count() -> PolarsResult<()> {
let df = fruits_cars();
let out = df
.lazy()
.select([col("fruits")
.filter(col("fruits").eq(lit("banana")))
.count()])
.collect()?;
assert_eq!(out.column("fruits")?.idx()?.get(0), Some(3));
Ok(())
}
#[test]
#[cfg(feature = "dtype-i16")]
fn test_group_by_small_ints() -> PolarsResult<()> {
let df = df![
"id_32" => [1i32, 2],
"id_16" => [1i16, 2]
]?;
let out = df
.lazy()
.group_by([col("id_16"), col("id_32")])
.agg([col("id_16").sum().alias("foo")])
.sort(
["foo"],
SortMultipleOptions::default().with_order_descending(true),
)
.collect()?;
assert_eq!(Vec::from(out.column("foo")?.i64()?), &[Some(2), Some(1)]);
Ok(())
}
#[test]
fn test_when_then_schema() -> PolarsResult<()> {
let df = fruits_cars();
let schema = df
.lazy()
.select([when(col("A").gt(lit(1)))
.then(Null {}.lit())
.otherwise(col("A"))])
.collect_schema();
assert_ne!(schema?.get_at_index(0).unwrap().1, &DataType::Null);
Ok(())
}
#[test]
fn test_singleton_broadcast() -> PolarsResult<()> {
let df = fruits_cars();
let out = df
.lazy()
.select([col("fruits"), lit(1).alias("foo")])
.collect()?;
assert!(out.column("foo")?.len() > 1);
Ok(())
}
#[test]
fn test_list_in_select_context() -> PolarsResult<()> {
let s = Column::new("a".into(), &[1, 2, 3]);
let mut builder = get_list_builder(s.dtype(), s.len(), 1, s.name().clone());
builder.append_series(s.as_materialized_series()).unwrap();
let expected = builder.finish().into_column();
let df = DataFrame::new(vec![s])?;
let out = df.lazy().select([col("a").implode()]).collect()?;
let s = out.column("a")?;
assert!(s.equals(&expected));
Ok(())
}
#[test]
#[cfg(feature = "round_series")]
fn test_round_after_agg() -> PolarsResult<()> {
let df = fruits_cars();
let out = df
.lazy()
.group_by([col("fruits")])
.agg([col("A")
.cast(DataType::Float32)
.mean()
.round(2, polars_ops::series::RoundMode::default())
.alias("foo")])
.collect()?;
assert!(out.column("foo")?.f32().is_ok());
let df = df![
"groups" => ["pigeon",
"rabbit",
"rabbit",
"Chris",
"pigeon",
"fast",
"fast",
"pigeon",
"rabbit",
"Chris"],
"b" => [5409, 4848, 4864, 3540, 8103, 3083, 8575, 9963, 8809, 5425],
"c" => [0.4517241160719615,
0.2551467646274673,
0.8682045191407308,
0.9925316385786037,
0.5392027792928116,
0.7633847828107002,
0.7967295231651537,
0.01444779067224733,
0.23807484087472652,
0.10985868798350984]
]?;
let out = df
.lazy()
.group_by_stable([col("groups")])
.agg([((col("b") * col("c")).sum() / col("b").sum())
.round(2, polars_ops::series::RoundMode::default())
.alias("foo")])
.collect()?;
let out = out.column("foo")?;
let out = out.f64()?;
assert_eq!(
Vec::from(out),
&[Some(0.3), Some(0.41), Some(0.46), Some(0.79)]
);
Ok(())
}
#[test]
#[cfg(feature = "dtype-date")]
fn test_fill_nan() -> PolarsResult<()> {
let s0 = Column::new("date".into(), &[1, 2, 3]).cast(&DataType::Date)?;
let s1 = Column::new("float".into(), &[Some(1.0), Some(f32::NAN), Some(3.0)]);
let df = DataFrame::new(vec![s0, s1])?;
let out = df.lazy().fill_nan(Null {}.lit()).collect()?;
let out = out.column("float")?;
assert_eq!(Vec::from(out.f32()?), &[Some(1.0), None, Some(3.0)]);
Ok(())
}
#[test]
#[cfg(feature = "regex")]
fn test_exclude_regex() -> PolarsResult<()> {
let df = fruits_cars();
let out = df
.lazy()
.select([(all() - Selector::Matches("^(fruits|cars)$".into())).as_expr()])
.collect()?;
assert_eq!(out.get_column_names(), &["A", "B"]);
Ok(())
}
#[test]
#[cfg(feature = "rank")]
fn test_group_by_rank() -> PolarsResult<()> {
let df = fruits_cars();
let out = df
.lazy()
.group_by_stable([col("cars")])
.agg([col("B").rank(
RankOptions {
method: RankMethod::Dense,
..Default::default()
},
None,
)])
.collect()?;
let out = out.column("B")?;
let out = out.list()?.get_as_series(1).unwrap();
let out = out.idx()?;
assert_eq!(Vec::from(out), &[Some(1)]);
Ok(())
}
#[test]
pub fn test_select_by_dtypes() -> PolarsResult<()> {
let df = df![
"bools" => [true, false, true],
"ints" => [1, 2, 3],
"strings" => ["a", "b", "c"],
"floats" => [1.0, 2.0, 3.0f32]
]?;
let out = df
.lazy()
.select([dtype_cols([DataType::Float32, DataType::String])
.as_selector()
.as_expr()])
.collect()?;
assert_eq!(out.dtypes(), &[DataType::String, DataType::Float32]);
Ok(())
}
#[test]
fn test_binary_expr() -> PolarsResult<()> {
let df = fruits_cars();
let _ = df.lazy().select([col("A").neq(lit(1))]).collect()?;
let df = df!(
"nrs"=> [Some(1i64), Some(2), Some(3), None, Some(5)],
"random"=> [0.1f64, 0.6, 0.2, 0.6, 0.3]
)?;
let other = when(col("random").gt(lit(0.5)))
.then(lit(2))
.otherwise(col("random"))
.alias("other");
let out = df.lazy().select([other * col("nrs").sum()]).collect()?;
assert_eq!(out.dtypes(), &[DataType::Float64]);
Ok(())
}
#[test]
fn test_single_group_result() -> PolarsResult<()> {
let df = df![
"a" => [1, 2],
"b" => [1, 1]
]?;
let out = df
.lazy()
.select([col("a").arg_sort(false, false).over([col("a")])])
.collect()?;
let a = out.column("a")?.idx()?;
assert_eq!(Vec::from(a), &[Some(0), Some(0)]);
Ok(())
}
#[test]
#[cfg(feature = "rank")]
fn test_single_ranked_group() -> PolarsResult<()> {
let df = df!["group" => [1, 2, 2],
"value"=> [100, 50, 10]
]?;
let out = df
.lazy()
.with_columns([col("value")
.rank(
RankOptions {
method: RankMethod::Average,
..Default::default()
},
None,
)
.over_with_options(Some([col("group")]), None, WindowMapping::Join)?])
.collect()?;
let out = out.column("value")?.explode(false)?;
let out = out.f64()?;
assert_eq!(
Vec::from(out),
&[Some(1.0), Some(2.0), Some(1.0), Some(2.0), Some(1.0)]
);
Ok(())
}
#[test]
#[cfg(feature = "diff")]
fn empty_df() -> PolarsResult<()> {
let df = fruits_cars();
let df = df.filter(&BooleanChunked::full("".into(), false, df.height()))?;
df.lazy()
.select([
col("A").shift(lit(1)).alias("1"),
col("A").shift_and_fill(lit(1), lit(1)).alias("2"),
col("A").shift_and_fill(lit(-1), lit(1)).alias("3"),
col("A").fill_null(lit(1)).alias("4"),
col("A").cum_count(false).alias("5"),
col("A").diff(lit(1), NullBehavior::Ignore).alias("6"),
col("A").cum_max(false).alias("7"),
col("A").cum_min(false).alias("8"),
])
.collect()?;
Ok(())
}
#[test]
#[cfg(feature = "abs")]
fn test_apply_flatten() -> PolarsResult<()> {
let df = df![
"A"=> [1.1435, 2.223456, 3.44732, -1.5234, -2.1238, -3.2923],
"B"=> ["a", "b", "a", "b", "a", "b"]
]?;
let out = df
.lazy()
.group_by_stable([col("B")])
.agg([col("A").abs().sum().alias("A_sum")])
.collect()?;
let out = out.column("A_sum")?;
assert_eq!(out.get(0)?, AnyValue::Float64(6.71462));
assert_eq!(out.get(1)?, AnyValue::Float64(7.039156));
Ok(())
}
#[test]
#[cfg(feature = "is_in")]
fn test_is_in() -> PolarsResult<()> {
let df = fruits_cars();
let out = df
.clone()
.lazy()
.group_by_stable([col("fruits")])
.agg([col("cars").is_in(
col("cars").filter(col("cars").eq(lit("beetle"))).implode(),
false,
)])
.collect()?;
let out = out.column("cars").unwrap();
let out = out.explode(false)?;
let out = out.bool().unwrap();
assert_eq!(
Vec::from(out),
&[Some(true), Some(false), Some(true), Some(true), Some(true)]
);
let out = df
.lazy()
.group_by_stable([col("fruits")])
.agg([col("cars").is_in(
lit(Series::new("a".into(), ["beetle", "vw"])).implode(),
false,
)])
.collect()?;
let out = out.column("cars").unwrap();
let out = out.explode(false)?;
let out = out.bool().unwrap();
assert_eq!(
Vec::from(out),
&[Some(true), Some(false), Some(true), Some(true), Some(true)]
);
Ok(())
}
#[test]
fn test_partitioned_gb_1() -> PolarsResult<()> {
let out = df![
"keys" => [1, 1, 1, 1, 2],
"vals" => ["a", "b", "c", "a", "a"]
]?
.lazy()
.group_by([col("keys")])
.agg([
(col("vals").eq(lit("a"))).sum().alias("eq_a"),
(col("vals").eq(lit("b"))).sum().alias("eq_b"),
])
.sort(["keys"], Default::default())
.collect()?;
assert!(out.equals(&df![
"keys" => [1, 2],
"eq_a" => [2 as IdxSize, 1],
"eq_b" => [1 as IdxSize, 0],
]?));
Ok(())
}
#[test]
fn test_partitioned_gb_count() -> PolarsResult<()> {
let out = df![
"col" => (0..100).map(|_| Some(0)).collect::<Int32Chunked>().into_series(),
]?
.lazy()
.group_by([col("col")])
.agg([
len().alias("counted"),
col("col").count().alias("count2"),
])
.collect()?;
assert!(out.equals(&df![
"col" => [0],
"counted" => [100 as IdxSize],
"count2" => [100 as IdxSize],
]?));
Ok(())
}
#[test]
fn test_partitioned_gb_mean() -> PolarsResult<()> {
let out = df![
"key" => (0..100).map(|_| Some(0)).collect::<Int32Chunked>().into_series(),
]?
.lazy()
.with_columns([lit("a").alias("str"), lit(1).alias("int")])
.group_by([col("key")])
.agg([
col("str").mean().alias("mean_str"),
col("int").mean().alias("mean_int"),
])
.collect()?;
assert_eq!(out.shape(), (1, 3));
let str_col = out.column("mean_str")?;
assert_eq!(str_col.get(0)?, AnyValue::Null);
let int_col = out.column("mean_int")?;
assert_eq!(int_col.get(0)?, AnyValue::Float64(1.0));
Ok(())
}
#[test]
fn test_partitioned_gb_binary() -> PolarsResult<()> {
let df = df![
"col" => (0..20).map(|_| Some(0)).collect::<Int32Chunked>().into_series(),
]?;
let out = df
.clone()
.lazy()
.group_by([col("col")])
.agg([(col("col") + lit(10)).sum().alias("sum")])
.collect()?;
assert!(out.equals(&df![
"col" => [0],
"sum" => [200],
]?));
let out = df
.lazy()
.group_by([col("col")])
.agg([(col("col").cast(DataType::Float32) + lit(10.0))
.sum()
.alias("sum")])
.collect()?;
assert!(out.equals(&df![
"col" => [0],
"sum" => [200.0_f32],
]?));
Ok(())
}
#[test]
fn test_partitioned_gb_ternary() -> PolarsResult<()> {
let df = df![
"col" => (0..20).map(|_| Some(0)).collect::<Int32Chunked>().into_series(),
"val" => (0..20).map(Some).collect::<Int32Chunked>().into_series(),
]?;
let out = df
.lazy()
.group_by([col("col")])
.agg([when(col("val").gt(lit(10)))
.then(lit(1))
.otherwise(lit(0))
.sum()
.alias("sum")])
.collect()?;
assert!(out.equals(&df![
"col" => [0],
"sum" => [9],
]?));
Ok(())
}
#[test]
fn test_sort_maintain_order_true() -> PolarsResult<()> {
let q = df![
"A" => [1, 1, 1, 1],
"B" => ["A", "B", "C", "D"],
]?
.lazy();
let res = q
.sort_by_exprs(
[col("A")],
SortMultipleOptions::default()
.with_maintain_order(true)
.with_nulls_last(true),
)
.slice(0, 3)
.collect()?;
assert!(res.equals(&df![
"A" => [1, 1, 1],
"B" => ["A", "B", "C"],
]?));
Ok(())
}
#[test]
fn test_over_with_options_empty_join() -> PolarsResult<()> {
let empty_df = DataFrame::new(vec![
Series::new_empty("a".into(), &DataType::Int32).into(),
Series::new_empty("b".into(), &DataType::Int32).into(),
])?;
let empty_df_out = empty_df
.lazy()
.select([col("b").over_with_options(
Some([col("a")]),
Option::None,
WindowMapping::Join,
)?])
.collect()?;
let f1: Field = Field::new("b".into(), DataType::List(Box::new(DataType::Int32)));
let sc: Schema = Schema::from_iter(vec![f1]);
assert_eq!(&**empty_df_out.schema(), &sc);
Ok(())
}
#[test]
#[cfg(feature = "serde")]
fn test_named_udfs() -> PolarsResult<()> {
use polars_plan::dsl::named_serde::{ExprRegistry, set_named_serde_registry};
let lf = DataFrame::new(vec![Column::new("a".into(), vec![1, 2, 3, 4])])?.lazy();
struct X;
impl ExprRegistry for X {
fn get_function(&self, name: &str, payload: &[u8]) -> Option<Arc<dyn AnonymousColumnsUdf>> {
assert_eq!(name, "test-function");
assert_eq!(payload, b"check");
Some(Arc::new(BaseColumnUdf::new(
|c: &mut [Column]| Ok(std::mem::take(&mut c[0]) * 2),
|_: &Schema, f: &[Field]| Ok(f[0].clone()),
)))
}
}
set_named_serde_registry(Arc::new(X) as _);
let expr = Expr::AnonymousFunction {
input: vec![Expr::Column("a".into())],
function: LazySerde::Named {
name: "test-function".into(),
payload: Some(bytes::Bytes::from("check")),
value: None,
},
options: FunctionOptions::default(),
fmt_str: Box::new("test".into()),
};
assert_eq!(
lf.select(&[expr]).collect()?,
DataFrame::new(vec![Column::new("a".into(), vec![2, 4, 6, 8])])?,
);
Ok(())
}