Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-lazy/src/tests/logical.rs
6939 views
1
use polars_core::utils::arrow::temporal_conversions::{MICROSECONDS_IN_DAY, MILLISECONDS_IN_DAY};
2
3
use super::*;
4
5
#[test]
6
#[cfg(all(feature = "strings", feature = "temporal", feature = "dtype-duration"))]
7
fn test_duration() -> PolarsResult<()> {
8
let df = df![
9
"date" => ["2021-01-01", "2021-01-02", "2021-01-03"],
10
"groups" => [1, 1, 1]
11
]?;
12
13
let out = df
14
.lazy()
15
.with_columns(&[col("date").str().to_date(StrptimeOptions {
16
..Default::default()
17
})])
18
.with_column(
19
col("date")
20
.cast(DataType::Datetime(TimeUnit::Milliseconds, None))
21
.alias("datetime"),
22
)
23
.group_by([col("groups")])
24
.agg([
25
(col("date") - col("date").first()).alias("date"),
26
(col("datetime") - col("datetime").first()).alias("datetime"),
27
])
28
.explode(by_name(["date", "datetime"], true))
29
.collect()?;
30
31
let column = out.column("date")?;
32
let (scale, _tu) = (MICROSECONDS_IN_DAY, TimeUnit::Microseconds);
33
assert!(matches!(column.dtype(), DataType::Duration(_tu)));
34
assert_eq!(column.get(0)?, AnyValue::Duration(0, _tu));
35
assert_eq!(column.get(1)?, AnyValue::Duration(scale, _tu));
36
assert_eq!(column.get(2)?, AnyValue::Duration(2 * scale, _tu));
37
38
let column = out.column("datetime")?;
39
let (scale, _tu) = (MILLISECONDS_IN_DAY, TimeUnit::Milliseconds);
40
assert!(matches!(column.dtype(), DataType::Duration(_tu)));
41
assert_eq!(column.get(0)?, AnyValue::Duration(0, _tu));
42
assert_eq!(column.get(1)?, AnyValue::Duration(scale, _tu));
43
assert_eq!(column.get(2)?, AnyValue::Duration(2 * scale, _tu));
44
Ok(())
45
}
46
47
fn print_plans(lf: &LazyFrame) {
48
println!("LOGICAL PLAN\n\n{}\n", lf.describe_plan().unwrap());
49
println!(
50
"OPTIMIZED LOGICAL PLAN\n\n{}\n",
51
lf.describe_optimized_plan().unwrap()
52
);
53
}
54
55
#[test]
56
fn test_lazy_arithmetic() {
57
let df = get_df();
58
let lf = df
59
.lazy()
60
.select(&[((col("sepal_width") * lit(100)).alias("super_wide"))])
61
.sort(["super_wide"], SortMultipleOptions::default());
62
63
print_plans(&lf);
64
65
let new = lf.collect().unwrap();
66
println!("{new:?}");
67
assert_eq!(new.height(), 7);
68
assert_eq!(
69
new.column("super_wide").unwrap().f64().unwrap().get(0),
70
Some(300.0)
71
);
72
}
73
74
#[test]
75
fn test_lazy_logical_plan_filter_and_alias_combined() {
76
let df = get_df();
77
let lf = df
78
.lazy()
79
.filter(col("sepal_width").lt(lit(3.5)))
80
.select(&[col("variety").alias("foo")]);
81
82
print_plans(&lf);
83
let df = lf.collect().unwrap();
84
println!("{df:?}");
85
}
86
87
#[test]
88
fn test_lazy_logical_plan_schema() {
89
let df = get_df();
90
let lp = df
91
.clone()
92
.lazy()
93
.select(&[col("variety").alias("foo")])
94
.logical_plan;
95
96
assert!(lp.compute_schema().unwrap().get("foo").is_some());
97
98
let lp = df
99
.lazy()
100
.group_by([col("variety")])
101
.agg([col("sepal_width").min()])
102
.logical_plan;
103
assert!(lp.compute_schema().unwrap().get("sepal_width").is_some());
104
}
105
106
#[test]
107
fn test_lazy_logical_plan_join() {
108
let left = df!("days" => &[0, 1, 2, 3, 4],
109
"temp" => [22.1, 19.9, 7., 2., 3.],
110
"rain" => &[0.1, 0.2, 0.3, 0.4, 0.5]
111
)
112
.unwrap();
113
114
let right = df!(
115
"days" => &[1, 2],
116
"rain" => &[0.1, 0.2]
117
)
118
.unwrap();
119
120
// check if optimizations succeeds without selection
121
{
122
let lf = left
123
.clone()
124
.lazy()
125
.left_join(right.clone().lazy(), col("days"), col("days"));
126
127
print_plans(&lf);
128
// implicitly checks logical plan == optimized logical plan
129
let _df = lf.collect().unwrap();
130
}
131
132
// check if optimization succeeds with selection
133
{
134
let lf = left
135
.clone()
136
.lazy()
137
.left_join(right.clone().lazy(), col("days"), col("days"))
138
.select(&[col("temp")]);
139
140
let _df = lf.collect().unwrap();
141
}
142
143
// check if optimization succeeds with selection of a renamed column due to the join
144
{
145
let lf = left
146
.lazy()
147
.left_join(right.lazy(), col("days"), col("days"))
148
.select(&[col("temp"), col("rain_right")]);
149
150
print_plans(&lf);
151
let _df = lf.collect().unwrap();
152
}
153
}
154
155