Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-lazy/src/tests/logical.rs
8458 views
1
use polars_core::utils::arrow::temporal_conversions::{MICROSECONDS_IN_DAY, MILLISECONDS_IN_DAY};
2
3
use super::*;
4
5
#[test]
6
#[cfg(all(feature = "strings", feature = "temporal", feature = "dtype-duration"))]
7
fn test_duration() -> PolarsResult<()> {
8
let df = df![
9
"date" => ["2021-01-01", "2021-01-02", "2021-01-03"],
10
"groups" => [1, 1, 1]
11
]?;
12
13
let out = df
14
.lazy()
15
.with_columns(&[col("date").str().to_date(StrptimeOptions {
16
..Default::default()
17
})])
18
.with_column(
19
col("date")
20
.cast(DataType::Datetime(TimeUnit::Milliseconds, None))
21
.alias("datetime"),
22
)
23
.group_by([col("groups")])
24
.agg([
25
(col("date") - col("date").first()).alias("date"),
26
(col("datetime") - col("datetime").first()).alias("datetime"),
27
])
28
.explode(
29
by_name(["date", "datetime"], true, false),
30
ExplodeOptions {
31
empty_as_null: true,
32
keep_nulls: true,
33
},
34
)
35
.collect()?;
36
37
let column = out.column("date")?;
38
let (scale, _tu) = (MICROSECONDS_IN_DAY, TimeUnit::Microseconds);
39
assert!(matches!(column.dtype(), DataType::Duration(_tu)));
40
assert_eq!(column.get(0)?, AnyValue::Duration(0, _tu));
41
assert_eq!(column.get(1)?, AnyValue::Duration(scale, _tu));
42
assert_eq!(column.get(2)?, AnyValue::Duration(2 * scale, _tu));
43
44
let column = out.column("datetime")?;
45
let (scale, _tu) = (MILLISECONDS_IN_DAY, TimeUnit::Milliseconds);
46
assert!(matches!(column.dtype(), DataType::Duration(_tu)));
47
assert_eq!(column.get(0)?, AnyValue::Duration(0, _tu));
48
assert_eq!(column.get(1)?, AnyValue::Duration(scale, _tu));
49
assert_eq!(column.get(2)?, AnyValue::Duration(2 * scale, _tu));
50
Ok(())
51
}
52
53
fn print_plans(lf: &LazyFrame) {
54
println!("LOGICAL PLAN\n\n{}\n", lf.describe_plan().unwrap());
55
println!(
56
"OPTIMIZED LOGICAL PLAN\n\n{}\n",
57
lf.describe_optimized_plan().unwrap()
58
);
59
}
60
61
#[test]
62
fn test_lazy_arithmetic() {
63
let df = get_df();
64
let lf = df
65
.lazy()
66
.select(&[((col("sepal_width") * lit(100)).alias("super_wide"))])
67
.sort(["super_wide"], SortMultipleOptions::default());
68
69
print_plans(&lf);
70
71
let new = lf.collect().unwrap();
72
println!("{new:?}");
73
assert_eq!(new.height(), 7);
74
assert_eq!(
75
new.column("super_wide").unwrap().f64().unwrap().get(0),
76
Some(300.0)
77
);
78
}
79
80
#[test]
81
fn test_lazy_logical_plan_filter_and_alias_combined() {
82
let df = get_df();
83
let lf = df
84
.lazy()
85
.filter(col("sepal_width").lt(lit(3.5)))
86
.select(&[col("variety").alias("foo")]);
87
88
print_plans(&lf);
89
let df = lf.collect().unwrap();
90
println!("{df:?}");
91
}
92
93
#[test]
94
fn test_lazy_logical_plan_schema() {
95
let df = get_df();
96
let lp = df
97
.clone()
98
.lazy()
99
.select(&[col("variety").alias("foo")])
100
.logical_plan;
101
102
assert!(lp.compute_schema().unwrap().get("foo").is_some());
103
104
let lp = df
105
.lazy()
106
.group_by([col("variety")])
107
.agg([col("sepal_width").min()])
108
.logical_plan;
109
assert!(lp.compute_schema().unwrap().get("sepal_width").is_some());
110
}
111
112
#[test]
113
fn test_lazy_logical_plan_join() {
114
let left = df!("days" => &[0, 1, 2, 3, 4],
115
"temp" => [22.1, 19.9, 7., 2., 3.],
116
"rain" => &[0.1, 0.2, 0.3, 0.4, 0.5]
117
)
118
.unwrap();
119
120
let right = df!(
121
"days" => &[1, 2],
122
"rain" => &[0.1, 0.2]
123
)
124
.unwrap();
125
126
// check if optimizations succeeds without selection
127
{
128
let lf = left
129
.clone()
130
.lazy()
131
.left_join(right.clone().lazy(), col("days"), col("days"));
132
133
print_plans(&lf);
134
// implicitly checks logical plan == optimized logical plan
135
let _df = lf.collect().unwrap();
136
}
137
138
// check if optimization succeeds with selection
139
{
140
let lf = left
141
.clone()
142
.lazy()
143
.left_join(right.clone().lazy(), col("days"), col("days"))
144
.select(&[col("temp")]);
145
146
let _df = lf.collect().unwrap();
147
}
148
149
// check if optimization succeeds with selection of a renamed column due to the join
150
{
151
let lf = left
152
.lazy()
153
.left_join(right.lazy(), col("days"), col("days"))
154
.select(&[col("temp"), col("rain_right")]);
155
156
print_plans(&lf);
157
let _df = lf.collect().unwrap();
158
}
159
}
160
161