Path: blob/main/crates/polars-lazy/src/tests/predicate_queries.rs
6939 views
use super::*;12#[test]3#[cfg(feature = "parquet")]4fn test_multiple_roots() -> PolarsResult<()> {5let mut expr_arena = Arena::with_capacity(16);6let mut lp_arena = Arena::with_capacity(8);78let lf = scan_foods_parquet(false).select([col("calories").alias("bar")]);910// this produces a predicate with two root columns, this test if we can11// deal with multiple roots12let lf = lf.filter(col("bar").gt(lit(45i32)));13let lf = lf.filter(col("bar").lt(lit(110i32)));1415// also check if all predicates are combined and pushed down16let root = lf.clone().optimize(&mut lp_arena, &mut expr_arena)?;17assert!(predicate_at_scan(lf));18// and that we don't have any filter node19assert!(20!lp_arena21.iter(root)22.any(|(_, lp)| matches!(lp, IR::Filter { .. }))23);2425Ok(())26}2728#[test]29#[cfg(all(feature = "is_in", feature = "strings", feature = "dtype-categorical"))]30fn test_issue_2472() -> PolarsResult<()> {31let df = df![32"group" => ["54360-2001-0-20020312-4-1"33,"39444-2020-0-20210418-4-1"34,"68398-2020-0-20201216-4-1"35,"30910-2020-0-20210223-4-1"36,"71060-2020-0-20210315-4-1"37,"47959-2020-0-20210305-4-1"38,"63212-2018-0-20181007-2-2"39,"61465-2018-0-20181018-2-2"40]41]?;42let base = df43.lazy()44.with_column(col("group").cast(DataType::from_categories(Categories::global())));4546let extract = col("group")47.cast(DataType::String)48.str()49.extract(lit(r"(\d+-){4}(\w+)-"), 2)50.cast(DataType::Int32)51.alias("age");52let predicate = col("age").is_in(lit(Series::new("".into(), [2i32])), false);5354let out = base55.clone()56.with_column(extract.clone())57.filter(predicate.clone())58.collect()?;5960assert_eq!(out.shape(), (2, 2));6162let out = base.select([extract]).filter(predicate).collect()?;63assert_eq!(out.shape(), (2, 1));6465Ok(())66}6768#[test]69fn test_pass_unrelated_apply() -> PolarsResult<()> {70// maps should not influence a predicate of a different column as maps should not depend on previous values71let df = fruits_cars();7273let q = df74.lazy()75.with_column(col("A").map(76|s| Ok(s.is_null().into_column()),77|_, f| Ok(Field::new(f.name().clone(), DataType::Boolean)),78))79.filter(col("B").gt(lit(10i32)));8081assert!(predicate_at_scan(q));8283Ok(())84}8586#[test]87fn filter_added_column_issue_2470() -> PolarsResult<()> {88let df = fruits_cars();8990// the binary expression in the predicate lead to an incorrect pushdown because the rhs91// was not checked on the schema.92let out = df93.lazy()94.select([col("A"), lit(NULL).alias("foo")])95.filter(col("A").gt(lit(2i32)).and(col("foo").is_null()))96.collect()?;97assert_eq!(out.shape(), (3, 2));9899Ok(())100}101102#[test]103fn filter_blocked_by_map() -> PolarsResult<()> {104let df = fruits_cars();105106let allowed = OptFlags::default() & !OptFlags::PREDICATE_PUSHDOWN;107let q = df108.lazy()109.map(Ok, allowed, None, None)110.filter(col("A").gt(lit(2i32)));111112assert!(!predicate_at_scan(q.clone()));113let out = q.collect()?;114assert_eq!(out.shape(), (3, 4));115116Ok(())117}118119#[test]120#[cfg(all(feature = "temporal", feature = "strings"))]121fn test_strptime_block_predicate() -> PolarsResult<()> {122let df = df![123"date" => ["2021-01-01", "2021-01-02"]124]?;125126let q = df127.lazy()128.with_column(col("date").str().to_date(StrptimeOptions {129..Default::default()130}))131.filter(132col("date").gt(NaiveDate::from_ymd_opt(2021, 1, 1)133.unwrap()134.and_hms_opt(0, 0, 0)135.unwrap()136.lit()),137);138139assert!(!predicate_at_scan(q.clone()));140let df = q.collect()?;141assert_eq!(df.shape(), (1, 1));142143Ok(())144}145146#[test]147fn test_strict_cast_predicate_pushdown() -> PolarsResult<()> {148let df = df![149"a" => ["a", "b", "c"]150]?;151152let lf = df153.lazy()154.with_column(col("a").cast(DataType::Int32))155.filter(col("a").is_null());156157assert!(!predicate_at_scan(lf.clone()));158let out = lf.collect()?;159assert_eq!(out.shape(), (3, 1));160Ok(())161}162163#[test]164fn test_filter_nulls_created_by_join() -> PolarsResult<()> {165// #2602166let a = df![167"key" => ["foo", "bar"],168"bar" => [1, 2]169]?;170171let b = df![172"key"=> ["bar"]173]?174.lazy()175.with_column(lit(true).alias("flag"));176177let out = a178.clone()179.lazy()180.join(b.clone(), [col("key")], [col("key")], JoinType::Left.into())181.filter(col("flag").is_null())182.collect()?;183let expected = df![184"key" => ["foo"],185"bar" => [1],186"flag" => &[None, Some(true)][0..1]187]?;188assert!(out.equals_missing(&expected));189190let out = a191.lazy()192.join(b, [col("key")], [col("key")], JoinType::Left.into())193.filter(col("flag").is_null())194.with_predicate_pushdown(false)195.collect()?;196assert!(out.equals_missing(&expected));197198Ok(())199}200201#[test]202fn test_filter_null_creation_by_cast() -> PolarsResult<()> {203let df = df![204"int" => [1, 2, 3],205"empty" => ["", "", ""]206]?;207208let out = df209.lazy()210.with_column(col("empty").cast(DataType::Int32).alias("empty"))211.filter(col("empty").is_null().and(col("int").eq(lit(3i32))))212.collect()?;213214let expected = df![215"int" => [3],216"empty" => &[None, Some(1i32)][..1]217]?;218assert!(out.equals_missing(&expected));219220Ok(())221}222223#[test]224#[cfg(feature = "cse")]225fn test_predicate_on_join_suffix_4788() -> PolarsResult<()> {226let lf = df![227"x" => [1, 2],228"y" => [1, 1],229]?230.lazy();231232let q = (lf.clone().join_builder().with(lf))233.left_on([col("y")])234.right_on([col("y")])235.suffix("_")236.finish()237.filter(col("x").eq(1))238.with_comm_subplan_elim(false);239240// the left hand side should have a predicate241assert!(predicate_at_scan(q.clone()));242243let expected = df![244"x" => [1, 1],245"y" => [1, 1],246"x_" => [1, 2],247]?;248assert_eq!(q.collect()?, expected);249250Ok(())251}252253#[test]254fn test_push_join_col_predicates_to_both_sides_7247() -> PolarsResult<()> {255let df1 = df! {256"a" => ["a1", "a2"],257"b" => ["b1", "b2"],258}?;259let df2 = df! {260"a" => ["a1", "a1", "a2"],261"b2" => ["b1", "b1", "b2"],262"c" => ["a1", "c", "a2"]263}?;264let df = df1.lazy().join(265df2.lazy(),266[col("a"), col("b")],267[col("a"), col("b2")],268JoinArgs::new(JoinType::Inner),269);270let q = df271.filter(col("a").eq(lit("a1")))272.filter(col("a").eq(col("c")));273274predicate_at_all_scans(q.clone());275276let out = q.collect()?;277let expected = df![278"a" => ["a1"],279"b" => ["b1"],280"c" => ["a1"],281]?;282assert_eq!(out, expected);283Ok(())284}285286#[test]287#[cfg(feature = "semi_anti_join")]288fn test_push_join_col_predicates_to_both_sides_semi_12565() -> PolarsResult<()> {289let df1 = df! {290"a" => ["a1", "a2"],291"b" => ["b1", "b2"],292}?;293let df2 = df! {294"a" => ["a1", "a1", "a2"],295"b2" => ["b1", "b1", "b2"],296"c" => ["a1", "c", "a2"]297}?;298let df = df1.lazy().join(299df2.lazy(),300[col("a"), col("b")],301[col("a"), col("b2")],302JoinArgs::new(JoinType::Semi),303);304let q = df.filter(col("a").eq(lit("a1")));305306predicate_at_all_scans(q.clone());307308let out = q.collect()?;309let expected = df![310"a" => ["a1"],311"b" => ["b1"],312]?;313assert_eq!(out, expected);314Ok(())315}316317318